Hi Sir, you can see our homework as below.
require(data.table)
## Loading required package: data.table
require(glmnet)
## Loading required package: glmnet
## Loading required package: Matrix
## Loaded glmnet 3.0-2
consumption=fread("/Users/yagizarslan/Downloads/Consumption.csv")
Quesiton A- On the data preperation part, we have used the code we discussed in the class. From there, we have calculated mape values, examined in quartiles and plot the boxplot of the mape as below.
#Question A
set.seed(500)
setnames(consumption,names(consumption)[3],'value')
consumption[,date:=as.Date(Tarih,'%d.%m.%Y')]
consumption[,hour:=as.numeric(substr(Saat,1,2))]
consumption=consumption[,list(date,hour,value)]
consumption[,value:=gsub(".", "",value, fixed = TRUE)]
consumption[,value:=as.numeric(gsub(",", ".",value, fixed = TRUE))]
consumption[,lag_168:=shift(value,168)]
consumption[,lag_48:=shift(value,48)]
consumption[1:169]
## date hour value lag_168 lag_48
## 1: 2016-01-01 0 26277.24 NA NA
## 2: 2016-01-01 1 24991.82 NA NA
## 3: 2016-01-01 2 23532.61 NA NA
## 4: 2016-01-01 3 22464.78 NA NA
## 5: 2016-01-01 4 22002.91 NA NA
## ---
## 165: 2016-01-07 20 33784.72 NA 34301.59
## 166: 2016-01-07 21 32638.14 NA 33400.82
## 167: 2016-01-07 22 32739.98 NA 33359.37
## 168: 2016-01-07 23 31092.87 NA 31629.97
## 169: 2016-01-08 0 28602.02 26277.24 29189.27
full_data=consumption[complete.cases(consumption)]
Whole_Absolute_Percent_Error_48 <- ((full_data$value-full_data$lag_48)/full_data$value)*100
From_March_APE_48=Whole_Absolute_Percent_Error_48[36313:38232]
summary(From_March_APE_48)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -56.2674 -6.9192 -0.2331 -1.4510 5.1747 36.2524
quantile(From_March_APE_48, c(0.1,0.9), type=7)
## 10% 90%
## -18.65027 13.16205
boxplot(From_March_APE_48)
Whole_Absolute_Percent_Error_168 <- ((full_data$value-consumption$lag_168)/full_data$value)*100
## Warning in full_data$value - consumption$lag_168: longer object length is not a
## multiple of shorter object length
## Warning in (full_data$value - consumption$lag_168)/full_data$value: longer
## object length is not a multiple of shorter object length
From_March_APE_168=Whole_Absolute_Percent_Error_168[36313:38232]
summary(From_March_APE_168)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -57.425 -10.874 -3.632 -4.295 2.064 27.597
quantile(From_March_APE_168, c(0.1,0.9), type=7)
## 10% 90%
## -17.341579 8.378579
boxplot(From_March_APE_168)
Question B- Now we have created a linear regression with the data, predicted values with the model, calculated mape values and exxamined them in specified quartiles.
#Question B#
Till_March=full_data[1:36312]
From_March=full_data[36313:38232]
fit_lr=lm(value~-1+lag_168+lag_48,Till_March)
summary(fit_lr)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17038.2 -928.9 46.6 1012.1 16322.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.663755 0.003060 216.9 <2e-16 ***
## lag_48 0.335616 0.003059 109.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2381 on 36310 degrees of freedom
## Multiple R-squared: 0.9949, Adjusted R-squared: 0.9949
## F-statistic: 3.511e+06 on 2 and 36310 DF, p-value: < 2.2e-16
test_pred <- predict(fit_lr, newdata=From_March)
LR_Absolute_Percent_Error <- ((From_March$value-test_pred)/From_March$value)*100
summary(LR_Absolute_Percent_Error)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -39.571 -5.187 -1.045 -1.656 2.750 22.236
quantile(LR_Absolute_Percent_Error, c(0.1,0.9), type=7)
## 10% 90%
## -10.358522 6.293902
Question C - This is where tasks get a little bit more complicated. Since our task now to use only the 7 days and 2 days prior specific hour values for the model, we get those values for each ours, created the respective model, and evaluated the results. We iterated the process for all 24 hours.
#Question C
#For 1 AM
One_Train = Till_March$hour == 1
One_Train_Till_March=Till_March[One_Train,]
One_Test = From_March$hour == 1
One_Test_From_March=From_March[One_Test,]
lr_one=lm(value~-1+lag_168+lag_48,One_Train_Till_March)
summary(lr_one)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = One_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9512.1 -756.3 81.8 838.3 6792.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.46780 0.01666 28.07 <2e-16 ***
## lag_48 0.53197 0.01666 31.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1556 on 1511 degrees of freedom
## Multiple R-squared: 0.9972, Adjusted R-squared: 0.9972
## F-statistic: 2.724e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_one_pred <- predict(fit_lr, newdata=One_Test_From_March)
summary(test_one_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23290 25785 27131 27367 29133 30472
LR_One_APE <- ((One_Test_From_March$value-test_one_pred)/One_Test_From_March$value)*100
LR_One_APE
## 1 2 3 4 5 6
## -3.4596541 -3.4424496 0.3785183 2.6629443 -1.3173865 -0.8122070
## 7 8 9 10 11 12
## 0.0737294 -3.5695396 -5.9325325 -1.3901289 2.0946018 0.9103836
## 13 14 15 16 17 18
## -0.5116576 -0.6916574 -1.8706391 -1.9298452 6.0572730 5.9772362
## 19 20 21 22 23 24
## 3.2534162 3.7348674 1.9129534 -2.3793247 -6.7191721 -4.4368152
## 25 26 27 28 29 30
## -2.8185062 -6.1706328 -6.8559674 -7.2651243 -6.4024795 -5.3995771
## 31 32 33 34 35 36
## -3.1136427 -1.9582076 -4.8945923 -3.8421717 -2.5899157 -6.1965525
## 37 38 39 40 41 42
## -9.5715159 -2.5670729 -1.9405582 -4.0198968 -3.1555662 -9.2528288
## 43 44 45 46 47 48
## -12.8063134 -5.3813369 1.2049003 1.3972050 1.5834055 1.3268388
## 49 50 51 52 53 54
## -1.8970800 -0.3663476 0.5059692 5.5556754 8.1320154 -3.2771336
## 55 56 57 58 59 60
## -1.4959815 2.8399721 2.9391690 4.9264923 7.4374058 5.9950116
## 61 62 63 64 65 66
## 5.7057146 -3.5696304 -7.3230069 -3.0959041 -0.1579521 4.5440632
## 67 68 69 70 71 72
## 3.3522436 1.4769287 6.0199634 6.7468625 -1.4755883 -0.2573043
## 73 74 75 76 77 78
## 4.1954941 5.0255610 2.0394044 3.3776321 4.0360586 5.3325842
## 79 80
## 3.5907684 -0.4220023
summary(LR_One_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.8063 -3.4468 -0.6017 -0.5208 3.0177 8.1320
quantile(LR_One_APE, c(0.1,0.9), type=7)
## 10% 90%
## -6.217145 5.570679
boxplot(LR_One_APE)
####-----------------------------------------------------------------------------------------------
#For 2 AM
TWO_Train = Till_March$hour == 2
TWO_Train_Till_March=Till_March[TWO_Train,]
TWO_Test = From_March$hour == 2
TWO_Test_From_March=From_March[TWO_Test,]
TWO_Test_From_March
## date hour value lag_168 lag_48
## 1: 2020-03-01 2 27749.22 28490.57 29404.13
## 2: 2020-03-02 2 27069.13 27374.44 28989.58
## 3: 2020-03-03 2 29119.38 29675.84 27749.22
## 4: 2020-03-04 2 29029.98 28699.70 27069.13
## 5: 2020-03-05 2 28706.58 29272.87 29119.38
## 6: 2020-03-06 2 28942.61 29404.13 29029.98
## 7: 2020-03-07 2 28986.44 28989.58 28706.58
## 8: 2020-03-08 2 27143.40 27749.22 28942.61
## 9: 2020-03-09 2 26184.82 27069.13 28986.44
## 10: 2020-03-10 2 28518.93 29119.38 27143.40
## 11: 2020-03-11 2 28759.72 29029.98 26184.82
## 12: 2020-03-12 2 29007.07 28706.58 28518.93
## 13: 2020-03-13 2 28639.42 28942.61 28759.72
## 14: 2020-03-14 2 28856.82 28986.44 29007.07
## 15: 2020-03-15 2 27162.76 27143.40 28639.42
## 16: 2020-03-16 2 26560.10 26184.82 28856.82
## 17: 2020-03-17 2 29541.72 28518.93 27162.76
## 18: 2020-03-18 2 29545.75 28759.72 26560.10
## 19: 2020-03-19 2 30077.21 29007.07 29541.72
## 20: 2020-03-20 2 29998.87 28639.42 29545.75
## 21: 2020-03-21 2 29581.15 28856.82 30077.21
## 22: 2020-03-22 2 27159.70 27162.76 29998.87
## 23: 2020-03-23 2 25800.00 26560.10 29581.15
## 24: 2020-03-24 2 27678.55 29541.72 27159.70
## 25: 2020-03-25 2 27551.52 29545.75 25800.00
## 26: 2020-03-26 2 27510.99 30077.21 27678.55
## 27: 2020-03-27 2 27176.12 29998.87 27551.52
## 28: 2020-03-28 2 26837.56 29581.15 27510.99
## 29: 2020-03-29 2 25488.24 27159.70 27176.12
## 30: 2020-03-30 2 24682.72 25800.00 26837.56
## 31: 2020-03-31 2 26210.44 27678.55 25488.24
## 32: 2020-04-01 2 25761.10 27551.52 24682.72
## 33: 2020-04-02 2 25711.36 27510.99 26210.44
## 34: 2020-04-03 2 25457.48 27176.12 25761.10
## 35: 2020-04-04 2 25562.20 26837.56 25711.36
## 36: 2020-04-05 2 23957.32 25488.24 25457.48
## 37: 2020-04-06 2 22913.98 24682.72 25562.20
## 38: 2020-04-07 2 24993.28 26210.44 23957.32
## 39: 2020-04-08 2 24592.11 25761.10 22913.98
## 40: 2020-04-09 2 24437.63 25711.36 24993.28
## 41: 2020-04-10 2 24420.00 25457.48 24592.11
## 42: 2020-04-11 2 22871.91 25562.20 24437.63
## 43: 2020-04-12 2 21535.22 23957.32 24420.00
## 44: 2020-04-13 2 21851.54 22913.98 22871.91
## 45: 2020-04-14 2 23909.80 24993.28 21535.22
## 46: 2020-04-15 2 23903.55 24592.11 21851.54
## 47: 2020-04-16 2 24478.01 24437.63 23909.80
## 48: 2020-04-17 2 24661.77 24420.00 23903.55
## 49: 2020-04-18 2 23116.83 22871.91 24478.01
## 50: 2020-04-19 2 22436.26 21535.22 24661.77
## 51: 2020-04-20 2 22371.17 21851.54 23116.83
## 52: 2020-04-21 2 24848.15 23909.80 22436.26
## 53: 2020-04-22 2 25395.92 23903.55 22371.17
## 54: 2020-04-23 2 23676.88 24478.01 24848.15
## 55: 2020-04-24 2 25580.74 24661.77 25395.92
## 56: 2020-04-25 2 25273.98 23116.83 23676.88
## 57: 2020-04-26 2 24759.05 22436.26 25580.74
## 58: 2020-04-27 2 25256.48 22371.17 25273.98
## 59: 2020-04-28 2 27413.67 24848.15 24759.05
## 60: 2020-04-29 2 27535.47 25395.92 25256.48
## 61: 2020-04-30 2 27272.38 23676.88 27413.67
## 62: 2020-05-01 2 25233.59 25580.74 27535.47
## 63: 2020-05-02 2 24144.16 25273.98 27272.38
## 64: 2020-05-03 2 24338.57 24759.05 25233.59
## 65: 2020-05-04 2 24867.20 25256.48 24144.16
## 66: 2020-05-05 2 27679.61 27413.67 24338.57
## 67: 2020-05-06 2 27427.69 27535.47 24867.20
## 68: 2020-05-07 2 27726.15 27272.38 27679.61
## 69: 2020-05-08 2 27799.32 25233.59 27427.69
## 70: 2020-05-09 2 26937.68 24144.16 27726.15
## 71: 2020-05-10 2 25185.61 24338.57 27799.32
## 72: 2020-05-11 2 25406.19 24867.20 26937.68
## 73: 2020-05-12 2 27914.67 27679.61 25185.61
## 74: 2020-05-13 2 28221.92 27427.69 25406.19
## 75: 2020-05-14 2 28268.56 27726.15 27914.67
## 76: 2020-05-15 2 28421.38 27799.32 28221.92
## 77: 2020-05-16 2 28385.73 26937.68 28268.56
## 78: 2020-05-17 2 27556.82 25185.61 28421.38
## 79: 2020-05-18 2 27087.31 25406.19 28385.73
## 80: 2020-05-19 2 27496.64 27914.67 27556.82
## date hour value lag_168 lag_48
lr_TWO=lm(value~-1+lag_168+lag_48,TWO_Train_Till_March)
summary(lr_TWO)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = TWO_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9295.5 -760.4 95.7 851.2 6667.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.46322 0.01669 27.76 <2e-16 ***
## lag_48 0.53652 0.01668 32.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1539 on 1511 degrees of freedom
## Multiple R-squared: 0.9971, Adjusted R-squared: 0.9971
## F-statistic: 2.581e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_TWO_pred <- predict(fit_lr, newdata=TWO_Test_From_March)
summary(test_TWO_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22262 24913 26597 26445 28067 29260
LR_TWO_APE <- ((TWO_Test_From_March$value-test_TWO_pred)/TWO_Test_From_March$value)*100
LR_TWO_APE
## 1 2 3 4 5
## -3.711912e+00 -3.066779e+00 3.737010e-01 3.085038e+00 -1.729064e+00
## 6 7 8 9 10
## -1.096810e+00 3.797717e-01 -3.643164e+00 -5.769586e+00 2.841806e-01
## 11 12 13 14 15
## 2.444008e+00 1.315311e+00 -7.807274e-01 -4.099648e-01 -1.714285e+00
## 16 17 18 19 20
## -1.901379e+00 5.063650e+00 5.220230e+00 3.022078e+00 3.577780e+00
## 21 22 23 24 25
## 1.125402e+00 -3.452950e+00 -6.811236e+00 -3.775976e+00 -2.607853e+00
## 26 27 28 29 30
## -6.332974e+00 -7.295015e+00 -7.564756e+00 -6.512334e+00 -5.871582e+00
## 31 32 33 34 35
## -2.730167e+00 -3.145311e+00 -5.234387e+00 -4.818367e+00 -3.444541e+00
## 36 37 38 39 40
## -6.280155e+00 -8.939411e+00 -1.778409e+00 -8.020438e-01 -4.159777e+00
## 41 42 43 44 45
## -2.993561e+00 -1.004193e+01 -1.189821e+01 -4.731479e+00 3.882450e-01
## 46 47 48 49 50
## 1.032043e+00 9.514934e-01 1.745480e+00 -1.210027e+00 -6.004943e-01
## 51 52 53 54 55
## 4.860245e-01 5.827154e+00 7.960742e+00 -3.843205e+00 2.689902e+00
## 56 57 58 59 60
## 7.848924e+00 5.176172e+00 7.622433e+00 9.524665e+00 7.998156e+00
## 61 62 63 64 65
## 8.639777e+00 -3.911814e+00 -7.391479e+00 -2.317977e+00 -2.968193e-04
## 66 67 68 69 70
## 4.751672e+00 2.935217e+00 1.205574e+00 6.637695e+00 5.963915e+00
## 71 72 73 74 75
## -1.187691e+00 -5.520214e-01 3.902984e+00 5.279364e+00 1.756678e+00
## 76 77 78 79 80
## 1.751226e+00 3.587496e+00 4.721455e+00 2.573627e+00 -1.019627e+00
summary(LR_TWO_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -11.8982 -3.6604 -0.5763 -0.3529 2.9569 9.5247
quantile(LR_TWO_APE, c(0.1,0.9), type=7)
## 10% 90%
## -6.35091 5.84083
boxplot(LR_TWO_APE)
####-----------------------------------------------------------------------------------------------
#For3 AM
Three_Train = Till_March$hour == 3
Three_Train_Till_March=Till_March[Three_Train,]
Three_Test = From_March$hour == 3
Three_Test_From_March=From_March[Three_Test,]
lr_three=lm(value~-1+lag_168+lag_48,Three_Train_Till_March)
summary(lr_three)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = Three_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8955.7 -765.9 53.3 855.6 6452.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.46788 0.01667 28.06 <2e-16 ***
## lag_48 0.53186 0.01667 31.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1515 on 1511 degrees of freedom
## Multiple R-squared: 0.997, Adjusted R-squared: 0.997
## F-statistic: 2.526e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_three_pred <- predict(fit_lr, newdata=Three_Test_From_March)
summary(test_three_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21669 24474 26145 25839 27383 28602
LR_Three_APE <- ((Three_Test_From_March$value-test_three_pred)/Three_Test_From_March$value)*100
summary(LR_Three_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.6767 -3.6132 -0.6474 -0.4344 2.4732 11.0983
quantile(LR_Three_APE, c(0.1,0.9), type=7)
## 10% 90%
## -6.696216 5.663273
boxplot(LR_Three_APE)
####-----------------------------------------------------------------------------------------------
#For 4 AM
Four_Train = Till_March$hour == 4
Four_Train_Till_March=Till_March[Four_Train,]
Four_Test = From_March$hour ==4
Four_Test_From_March=From_March[Four_Test,]
lr_four=lm(value~-1+lag_168+lag_48,Four_Train_Till_March)
summary(lr_four)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = Four_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7870.9 -783.8 27.0 849.4 6727.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.48554 0.01651 29.41 <2e-16 ***
## lag_48 0.51423 0.01650 31.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1491 on 1511 degrees of freedom
## Multiple R-squared: 0.997, Adjusted R-squared: 0.997
## F-statistic: 2.547e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_four_pred <- predict(fit_lr, newdata=Four_Test_From_March)
summary(test_four_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21430 23948 25302 25406 27118 28440
LR_Four_APE <- ((Four_Test_From_March$value-test_four_pred)/Four_Test_From_March$value)*100
summary(LR_Four_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.7445 -3.7273 -0.8539 -0.6650 2.3104 9.9317
quantile(LR_Four_APE, c(0.1,0.9), type=7)
## 10% 90%
## -6.949318 5.715448
boxplot(LR_Four_APE)
#For 5 AM
FIVE_Train = Till_March$hour == 5
FIVE_Train_Till_March=Till_March[FIVE_Train,]
FIVE_Test = From_March$hour == 5
FIVE_Test_From_March=From_March[FIVE_Test,]
lr_FIVE=lm(value~-1+lag_168+lag_48,FIVE_Train_Till_March)
summary(lr_FIVE)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = FIVE_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7583.8 -768.1 13.5 864.2 6792.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.51863 0.01630 31.82 <2e-16 ***
## lag_48 0.48120 0.01629 29.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1484 on 1511 degrees of freedom
## Multiple R-squared: 0.997, Adjusted R-squared: 0.997
## F-statistic: 2.541e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_FIVE_pred <- predict(fit_lr, newdata=FIVE_Test_From_March)
summary(test_FIVE_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21407 22975 24458 25046 27615 28805
LR_FIVE_APE <- ((FIVE_Test_From_March$value-test_FIVE_pred)/FIVE_Test_From_March$value)*100
summary(LR_FIVE_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -13.1634 -4.1700 -0.9415 -1.2569 1.5554 7.9202
quantile(LR_FIVE_APE, c(0.1,0.9), type=7)
## 10% 90%
## -7.313464 4.622462
boxplot(LR_FIVE_APE)
####-----------------------------------------------------------------------------------------------
#For 6 AM
SIX_Train = Till_March$hour == 6
SIX_Train_Till_March=Till_March[SIX_Train,]
SIX_Test = From_March$hour == 6
SIX_Test_From_March=From_March[SIX_Test,]
lr_SIX=lm(value~-1+lag_168+lag_48,SIX_Train_Till_March)
summary(lr_SIX)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = SIX_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7236.9 -722.8 -12.9 838.9 7353.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.58829 0.01576 37.32 <2e-16 ***
## lag_48 0.41160 0.01575 26.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1567 on 1511 degrees of freedom
## Multiple R-squared: 0.9967, Adjusted R-squared: 0.9967
## F-statistic: 2.312e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_SIX_pred <- predict(fit_lr, newdata=SIX_Test_From_March)
summary(test_SIX_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19805 21775 23784 24660 28192 30087
LR_SIX_APE <- ((SIX_Test_From_March$value-test_SIX_pred)/SIX_Test_From_March$value)*100
summary(LR_SIX_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -15.758 -5.105 -1.629 -1.912 1.980 9.684
quantile(LR_SIX_APE, c(0.1,0.9), type=7)
## 10% 90%
## -9.086746 4.878278
boxplot(LR_SIX_APE)
####-----------------------------------------------------------------------------------------------
#For 7AM
SEVEN_Train = Till_March$hour == 7
SEVEN_Train_Till_March=Till_March[SEVEN_Train,]
SEVEN_Test = From_March$hour == 7
SEVEN_Test_From_March=From_March[SEVEN_Test,]
lr_SEVEN=lm(value~-1+lag_168+lag_48,SEVEN_Train_Till_March)
summary(lr_SEVEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = SEVEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8981.7 -821.4 62.4 901.2 9313.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.69729 0.01439 48.44 <2e-16 ***
## lag_48 0.30253 0.01438 21.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1931 on 1511 degrees of freedom
## Multiple R-squared: 0.9955, Adjusted R-squared: 0.9955
## F-statistic: 1.683e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_SEVEN_pred <- predict(fit_lr, newdata=SEVEN_Test_From_March)
summary(test_SEVEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18948 21419 23551 24839 28616 31563
LR_SEVEN_APE <- ((SEVEN_Test_From_March$value-test_SEVEN_pred)/SEVEN_Test_From_March$value)*100
summary(LR_SEVEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -19.006 -6.063 -1.940 -2.302 2.224 11.057
quantile(LR_SEVEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -10.37169 4.89504
boxplot(LR_SEVEN_APE)
####-----------------------------------------------------------------------------------------------
#For 8 AM
EIGHT_Train = Till_March$hour == 8
EIGHT_Train_Till_March=Till_March[EIGHT_Train,]
EIGHT_Test = From_March$hour == 8
EIGHT_Test_From_March=From_March[EIGHT_Test,]
lr_EIGHT=lm(value~-1+lag_168+lag_48,EIGHT_Train_Till_March)
summary(lr_EIGHT)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = EIGHT_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13560.9 -1057.6 89.8 1083.9 13579.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.77602 0.01295 59.91 <2e-16 ***
## lag_48 0.22342 0.01295 17.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2734 on 1511 degrees of freedom
## Multiple R-squared: 0.993, Adjusted R-squared: 0.993
## F-statistic: 1.069e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_EIGHT_pred <- predict(fit_lr, newdata=EIGHT_Test_From_March)
summary(test_EIGHT_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18788 23192 26283 27336 31859 35762
LR_EIGHT_APE <- ((EIGHT_Test_From_March$value-test_EIGHT_pred)/EIGHT_Test_From_March$value)*100
summary(LR_EIGHT_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -30.275 -7.550 -1.548 -3.076 3.200 18.230
quantile(LR_EIGHT_APE, c(0.1,0.9), type=7)
## 10% 90%
## -16.508616 7.992589
boxplot(LR_EIGHT_APE)
#For 9 AM
NINE_Train = Till_March$hour == 9
NINE_Train_Till_March=Till_March[NINE_Train,]
NINE_Test = From_March$hour == 9
NINE_Test_From_March=From_March[NINE_Test,]
NINE_Test_From_March
## date hour value lag_168 lag_48
## 1: 2020-03-01 9 27198.41 28974.88 38134.98
## 2: 2020-03-02 9 36323.03 38922.74 35697.28
## 3: 2020-03-03 9 36037.50 37661.25 27198.41
## 4: 2020-03-04 9 35706.28 36855.63 36323.03
## 5: 2020-03-05 9 36564.17 37102.36 36037.50
## 6: 2020-03-06 9 37175.66 38134.98 35706.28
## 7: 2020-03-07 9 33222.05 35697.28 36564.17
## 8: 2020-03-08 9 25124.34 27198.41 37175.66
## 9: 2020-03-09 9 34605.41 36323.03 33222.05
## 10: 2020-03-10 9 35872.00 36037.50 25124.34
## 11: 2020-03-11 9 35529.49 35706.28 34605.41
## 12: 2020-03-12 9 36107.54 36564.17 35872.00
## 13: 2020-03-13 9 36739.76 37175.66 35529.49
## 14: 2020-03-14 9 33087.52 33222.05 36107.54
## 15: 2020-03-15 9 25673.27 25124.34 36739.76
## 16: 2020-03-16 9 37526.49 34605.41 33087.52
## 17: 2020-03-17 9 37916.65 35872.00 25673.27
## 18: 2020-03-18 9 37135.17 35529.49 37526.49
## 19: 2020-03-19 9 37991.83 36107.54 37916.65
## 20: 2020-03-20 9 36671.00 36739.76 37135.17
## 21: 2020-03-21 9 31125.85 33087.52 37991.83
## 22: 2020-03-22 9 23466.82 25673.27 36671.00
## 23: 2020-03-23 9 32441.49 37526.49 31125.85
## 24: 2020-03-24 9 32708.37 37916.65 23466.82
## 25: 2020-03-25 9 32688.57 37135.17 32441.49
## 26: 2020-03-26 9 31400.95 37991.83 32708.37
## 27: 2020-03-27 9 32105.32 36671.00 32688.57
## 28: 2020-03-28 9 28462.85 31125.85 31400.95
## 29: 2020-03-29 9 23960.74 23466.82 32105.32
## 30: 2020-03-30 9 29960.33 32441.49 28462.85
## 31: 2020-03-31 9 29236.75 32708.37 23960.74
## 32: 2020-04-01 9 30122.86 32688.57 29960.33
## 33: 2020-04-02 9 29018.00 31400.95 29236.75
## 34: 2020-04-03 9 29902.29 32105.32 30122.86
## 35: 2020-04-04 9 25313.43 28462.85 29018.00
## 36: 2020-04-05 9 21951.15 23960.74 29902.29
## 37: 2020-04-06 9 27372.21 29960.33 25313.43
## 38: 2020-04-07 9 27013.32 29236.75 21951.15
## 39: 2020-04-08 9 26900.97 30122.86 27372.21
## 40: 2020-04-09 9 27352.13 29018.00 27013.32
## 41: 2020-04-10 9 26711.17 29902.29 26900.97
## 42: 2020-04-11 9 18677.36 25313.43 27352.13
## 43: 2020-04-12 9 17236.67 21951.15 26711.17
## 44: 2020-04-13 9 25331.52 27372.21 18677.36
## 45: 2020-04-14 9 25888.32 27013.32 17236.67
## 46: 2020-04-15 9 27396.48 26900.97 25331.52
## 47: 2020-04-16 9 27809.80 27352.13 25888.32
## 48: 2020-04-17 9 26026.07 26711.17 27396.48
## 49: 2020-04-18 9 20737.40 18677.36 27809.80
## 50: 2020-04-19 9 19124.48 17236.67 26026.07
## 51: 2020-04-20 9 26548.26 25331.52 20737.40
## 52: 2020-04-21 9 30000.31 25888.32 19124.48
## 53: 2020-04-22 9 29942.73 27396.48 26548.26
## 54: 2020-04-23 9 22653.01 27809.80 30000.31
## 55: 2020-04-24 9 22477.51 26026.07 29942.73
## 56: 2020-04-25 9 20616.87 20737.40 22653.01
## 57: 2020-04-26 9 18144.28 19124.48 22477.51
## 58: 2020-04-27 9 26071.28 26548.26 20616.87
## 59: 2020-04-28 9 26536.01 30000.31 18144.28
## 60: 2020-04-29 9 26660.87 29942.73 26071.28
## 61: 2020-04-30 9 27108.56 22653.01 26536.01
## 62: 2020-05-01 9 20746.90 22477.51 26660.87
## 63: 2020-05-02 9 19456.19 20616.87 27108.56
## 64: 2020-05-03 9 19297.35 18144.28 20746.90
## 65: 2020-05-04 9 27181.55 26071.28 19456.19
## 66: 2020-05-05 9 27373.41 26536.01 19297.35
## 67: 2020-05-06 9 27239.33 26660.87 27181.55
## 68: 2020-05-07 9 27637.44 27108.56 27373.41
## 69: 2020-05-08 9 27529.80 20746.90 27239.33
## 70: 2020-05-09 9 21838.66 19456.19 27637.44
## 71: 2020-05-10 9 18098.28 19297.35 27529.80
## 72: 2020-05-11 9 26935.06 27181.55 21838.66
## 73: 2020-05-12 9 27312.23 27373.41 18098.28
## 74: 2020-05-13 9 27652.68 27239.33 26935.06
## 75: 2020-05-14 9 28054.28 27637.44 27312.23
## 76: 2020-05-15 9 28589.10 27529.80 27652.68
## 77: 2020-05-16 9 25074.01 21838.66 28054.28
## 78: 2020-05-17 9 21902.16 18098.28 28589.10
## 79: 2020-05-18 9 25530.96 26935.06 25074.01
## 80: 2020-05-19 9 23559.87 27312.23 21902.16
## date hour value lag_168 lag_48
lr_NINE=lm(value~-1+lag_168+lag_48,NINE_Train_Till_March)
summary(lr_NINE)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = NINE_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15746.5 -1264.0 72.6 1294.5 15805.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.77076 0.01311 58.79 <2e-16 ***
## lag_48 0.22837 0.01310 17.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3078 on 1511 degrees of freedom
## Multiple R-squared: 0.9923, Adjusted R-squared: 0.9922
## F-statistic: 9.675e+04 on 2 and 1511 DF, p-value: < 2.2e-16
test_NINE_pred <- predict(fit_lr, newdata=NINE_Test_From_March)
summary(test_NINE_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19006 24388 27940 28784 33301 37816
LR_NINE_APE <- ((NINE_Test_From_March$value-test_NINE_pred)/NINE_Test_From_March$value)*100
summary(LR_NINE_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -39.108 -8.779 -2.272 -3.428 3.435 21.328
quantile(LR_NINE_APE, c(0.1,0.9), type=7)
## 10% 90%
## -17.807898 9.876017
boxplot(LR_NINE_APE)
####-----------------------------------------------------------------------------------------------
#For 10 AM
TEN_Train = Till_March$hour == 10
TEN_Train_Till_March=Till_March[TEN_Train,]
TEN_Test = From_March$hour == 10
TEN_Test_From_March=From_March[TEN_Test,]
lr_TEN=lm(value~-1+lag_168+lag_48,TEN_Train_Till_March)
summary(lr_TEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = TEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16408.5 -1334.3 113.5 1386.2 16525.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.73632 0.01384 53.21 <2e-16 ***
## lag_48 0.26265 0.01384 18.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3088 on 1511 degrees of freedom
## Multiple R-squared: 0.9925, Adjusted R-squared: 0.9925
## F-statistic: 1.004e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_TEN_pred <- predict(fit_lr, newdata=TEN_Test_From_March)
summary(test_TEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19342 25199 28684 29287 33648 38082
LR_TEN_APE <- ((TEN_Test_From_March$value-test_TEN_pred)/TEN_Test_From_March$value)*100
summary(LR_TEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -39.571 -9.747 -2.388 -3.239 3.449 22.236
quantile(LR_TEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -16.35777 10.39703
boxplot(LR_TEN_APE)
####-----------------------------------------------------------------------------------------------
#For 11 AM
ELEVEN_Train = Till_March$hour == 11
ELEVEN_Train_Till_March=Till_March[ELEVEN_Train,]
ELEVEN_Test = From_March$hour == 11
ELEVEN_Test_From_March=From_March[ELEVEN_Test,]
lr_ELEVEN=lm(value~-1+lag_168+lag_48,ELEVEN_Train_Till_March)
summary(lr_ELEVEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = ELEVEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17186.8 -1407.3 49.7 1450.3 16670.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.7131 0.0143 49.87 <2e-16 ***
## lag_48 0.2858 0.0143 19.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3135 on 1511 degrees of freedom
## Multiple R-squared: 0.9925, Adjusted R-squared: 0.9925
## F-statistic: 1.004e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_ELEVEN_pred <- predict(fit_lr, newdata=ELEVEN_Test_From_March)
summary(test_ELEVEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20149 25894 29489 29910 33788 38356
LR_ELEVEN_APE <- ((ELEVEN_Test_From_March$value-test_ELEVEN_pred)/ELEVEN_Test_From_March$value)*100
summary(LR_ELEVEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -38.527 -9.398 -2.131 -3.023 3.416 21.770
quantile(LR_ELEVEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -16.54436 10.57046
boxplot(LR_ELEVEN_APE)
####-----------------------------------------------------------------------------------------------
#For 12 AM
TWELVE_Train = Till_March$hour == 12
TWELVE_Train_Till_March=Till_March[TWELVE_Train,]
TWELVE_Test = From_March$hour == 12
TWELVE_Test_From_March=From_March[TWELVE_Test,]
lr_TWELVE=lm(value~-1+lag_168+lag_48,TWELVE_Train_Till_March)
summary(lr_TWELVE)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = TWELVE_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15625.2 -1404.2 26.6 1438.9 15407.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.65826 0.01522 43.25 <2e-16 ***
## lag_48 0.34056 0.01522 22.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2869 on 1511 degrees of freedom
## Multiple R-squared: 0.9933, Adjusted R-squared: 0.9933
## F-statistic: 1.117e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_TWELVE_pred <- predict(fit_lr, newdata=TWELVE_Test_From_March)
summary(test_TWELVE_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20768 25486 28929 29254 32317 36709
LR_TWELVE_APE <- ((TWELVE_Test_From_March$value-test_TWELVE_pred)/TWELVE_Test_From_March$value)*100
LR_TWELVE_APE
## 1 2 3 4 5 6
## -11.4947755 -10.2828773 -0.5889137 0.2082033 1.9455873 1.9737588
## 7 8 9 10 11 12
## -15.6752322 -19.9024675 -2.6718947 12.2357533 1.3155633 -3.8523850
## 13 14 15 16 17 18
## 1.4274467 1.1608511 -4.9424317 16.6283961 14.8944177 7.6536667
## 19 20 21 22 23 24
## 10.1042589 3.7154576 -9.8023713 -22.3530746 -13.3098066 -3.5552932
## 25 26 27 28 29 30
## -8.8952879 -15.7575866 -7.4905620 -1.4969847 -2.9465250 -1.8885374
## 31 32 33 34 35 36
## -0.4511306 -2.6783363 -1.4046728 -3.4499482 -14.8250030 -19.4090361
## 37 38 39 40 41 42
## -1.5811330 -1.7050967 -10.9289516 -6.5022340 -10.9399054 -33.7262000
## 43 44 45 46 47 48
## -33.9532414 -1.3354779 3.9196608 5.7505949 3.8274179 -5.9872152
## 49 50 51 52 53 54
## -3.4709998 -2.3978406 9.5606671 21.5472466 7.8595436 -18.8428645
## 55 56 57 58 59 60
## -15.0259960 -3.7037356 -8.9137029 6.1330527 0.9341508 -6.2869838
## 61 62 63 64 65 66
## 11.0011247 -13.0202102 -11.2740518 4.4987427 12.3625005 10.7853342
## 67 68 69 70 71 72
## 0.2122031 3.5882793 14.7106366 -5.6212714 -24.3340682 2.5848384
## 73 74 75 76 77 78
## 8.4716005 1.0949415 -0.4939526 2.2019639 5.4814155 4.7226563
## 79 80
## -0.5879379 -3.8030458
summary(LR_TWELVE_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -33.953 -8.900 -1.539 -2.613 3.743 21.547
quantile(LR_TWELVE_APE, c(0.1,0.9), type=7)
## 10% 90%
## -15.68347 10.17237
boxplot(LR_TWELVE_APE)
####-----------------------------------------------------------------------------------------------
#For 13 AM
THIRTEEN_Train = Till_March$hour == 13
THIRTEEN_Train_Till_March=Till_March[THIRTEEN_Train,]
THIRTEEN_Test = From_March$hour == 13
THIRTEEN_Test_From_March=From_March[THIRTEEN_Test,]
lr_THIRTEEN=lm(value~-1+lag_168+lag_48,THIRTEEN_Train_Till_March)
summary(lr_THIRTEEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = THIRTEEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15676.2 -1423.2 -15.2 1471.3 15564.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.63015 0.01556 40.50 <2e-16 ***
## lag_48 0.36854 0.01556 23.69 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2939 on 1511 degrees of freedom
## Multiple R-squared: 0.9931, Adjusted R-squared: 0.9931
## F-statistic: 1.084e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_THIRTEEN_pred <- predict(fit_lr, newdata=THIRTEEN_Test_From_March)
summary(test_THIRTEEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21485 26290 29395 29709 32412 37425
LR_THIRTEEN_APE <- ((THIRTEEN_Test_From_March$value-test_THIRTEEN_pred)/THIRTEEN_Test_From_March$value)*100
summary(LR_THIRTEEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -33.611 -8.460 -1.213 -2.299 3.748 21.356
quantile(LR_THIRTEEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -15.95339 10.43818
boxplot(LR_THIRTEEN_APE)
####-----------------------------------------------------------------------------------------------
#For 14 AM
FOURTEEN_Train = Till_March$hour == 14
FOURTEEN_Train_Till_March=Till_March[FOURTEEN_Train,]
FOURTEEN_Test = From_March$hour == 14
FOURTEEN_Test_From_March=From_March[FOURTEEN_Test,]
lr_FOURTEEN=lm(value~-1+lag_168+lag_48,FOURTEEN_Train_Till_March)
summary(lr_FOURTEEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = FOURTEEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16200.4 -1486.0 -23.5 1633.8 16433.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.6744 0.0149 45.26 <2e-16 ***
## lag_48 0.3242 0.0149 21.75 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3165 on 1511 degrees of freedom
## Multiple R-squared: 0.9923, Adjusted R-squared: 0.9923
## F-statistic: 9.703e+04 on 2 and 1511 DF, p-value: < 2.2e-16
test_FOURTEEN_pred <- predict(fit_lr, newdata=FOURTEEN_Test_From_March)
summary(test_FOURTEEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21916 27223 30190 30345 33126 38210
LR_FOURTEEN_APE <- ((FOURTEEN_Test_From_March$value-test_FOURTEEN_pred)/FOURTEEN_Test_From_March$value)*100
summary(LR_FOURTEEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -34.824 -8.722 -1.359 -2.205 3.642 20.989
quantile(LR_FOURTEEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -15.02075 11.09291
boxplot(LR_FOURTEEN_APE)
#For 15 M
FIFTEEN_Train = Till_March$hour == 15
FIFTEEN_Train_Till_March=Till_March[FIFTEEN_Train,]
FIFTEEN_Test = From_March$hour == 15
FIFTEEN_Test_From_March=From_March[FIFTEEN_Test,]
lr_FIFTEEN=lm(value~-1+lag_168+lag_48,FIFTEEN_Train_Till_March)
summary(lr_FIFTEEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = FIFTEEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15967.7 -1532.6 -25.4 1635.1 15753.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.68070 0.01479 46.03 <2e-16 ***
## lag_48 0.31798 0.01479 21.50 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3127 on 1511 degrees of freedom
## Multiple R-squared: 0.9924, Adjusted R-squared: 0.9924
## F-statistic: 9.878e+04 on 2 and 1511 DF, p-value: < 2.2e-16
test_FIFTEEN_pred <- predict(fit_lr, newdata=FIFTEEN_Test_From_March)
summary(test_FIFTEEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22312 27440 30374 30388 33183 38005
LR_FIFTEEN_APE <- ((FIFTEEN_Test_From_March$value-test_FIFTEEN_pred)/FIFTEEN_Test_From_March$value)*100
summary(LR_FIFTEEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -32.910 -7.383 -1.062 -1.944 3.593 19.991
quantile(LR_FIFTEEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -14.66144 10.44684
boxplot(LR_FIFTEEN_APE)
####-----------------------------------------------------------------------------------------------
#For 16 AM
SIXTEEN_Train = Till_March$hour == 16
SIXTEEN_Train_Till_March=Till_March[SIXTEEN_Train,]
SIXTEEN_Test = From_March$hour == 16
SIXTEEN_Test_From_March=From_March[SIXTEEN_Test,]
lr_SIXTEEN=lm(value~-1+lag_168+lag_48,SIXTEEN_Train_Till_March)
summary(lr_SIXTEEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = SIXTEEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15306.2 -1437.1 19.5 1518.8 15333.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.69343 0.01456 47.63 <2e-16 ***
## lag_48 0.30532 0.01456 20.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3042 on 1511 degrees of freedom
## Multiple R-squared: 0.9929, Adjusted R-squared: 0.9929
## F-statistic: 1.053e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_SIXTEEN_pred <- predict(fit_lr, newdata=SIXTEEN_Test_From_March)
summary(test_SIXTEEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23080 28036 30688 30827 33552 38170
LR_SIXTEEN_APE <- ((SIXTEEN_Test_From_March$value-test_SIXTEEN_pred)/SIXTEEN_Test_From_March$value)*100
summary(LR_SIXTEEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -29.3684 -6.2447 -0.7826 -1.8056 3.2476 18.2322
quantile(LR_SIXTEEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -15.34655 9.43066
boxplot(LR_SIXTEEN_APE)
#For 17 AM
SEVENTEEN_Train = Till_March$hour == 17
SEVENTEEN_Train_Till_March=Till_March[SEVENTEEN_Train,]
SEVENTEEN_Test = From_March$hour == 17
SEVENTEEN_Test_From_March=From_March[SEVENTEEN_Test,]
lr_SEVENTEEN=lm(value~-1+lag_168+lag_48,SEVENTEEN_Train_Till_March)
summary(lr_SEVENTEEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = SEVENTEEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13984.5 -1300.4 33.3 1405.5 13687.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.68594 0.01462 46.91 <2e-16 ***
## lag_48 0.31307 0.01462 21.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2763 on 1511 degrees of freedom
## Multiple R-squared: 0.9942, Adjusted R-squared: 0.9941
## F-statistic: 1.285e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_SEVENTEEN_pred <- predict(fit_lr, newdata=SEVENTEEN_Test_From_March)
summary(test_SEVENTEEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24326 28637 30902 31442 34045 38049
LR_SEVENTEEN_APE <- ((SEVENTEEN_Test_From_March$value-test_SEVENTEEN_pred)/SEVENTEEN_Test_From_March$value)*100
summary(LR_SEVENTEEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -24.621 -5.329 -1.307 -1.607 2.361 15.735
quantile(LR_SEVENTEEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -12.379733 8.382728
boxplot(LR_SEVENTEEN_APE)
####-----------------------------------------------------------------------------------------------
#For 18 AM
EIGHTEEN_Train = Till_March$hour == 18
EIGHTEEN_Train_Till_March=Till_March[EIGHTEEN_Train,]
EIGHTEEN_Train_Till_March
## date hour value lag_168 lag_48
## 1: 2016-01-08 18 35930.69 31438.11 35832.00
## 2: 2016-01-09 18 34407.27 33591.26 35841.62
## 3: 2016-01-10 18 31706.26 33151.03 35930.69
## 4: 2016-01-11 18 35697.30 37621.41 34407.27
## 5: 2016-01-12 18 35055.97 36954.78 31706.26
## ---
## 1509: 2020-02-25 18 38129.02 39088.69 34058.60
## 1510: 2020-02-26 18 37440.58 39201.23 38808.18
## 1511: 2020-02-27 18 37941.85 39959.80 38129.02
## 1512: 2020-02-28 18 37800.13 40477.52 37440.58
## 1513: 2020-02-29 18 36863.92 38128.89 37941.85
EIGHTEEN_Test = From_March$hour == 18
EIGHTEEN_Test_From_March=From_March[EIGHTEEN_Test,]
lr_EIGHTEEN=lm(value~-1+lag_168+lag_48,EIGHTEEN_Train_Till_March)
summary(lr_EIGHTEEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = EIGHTEEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12665.2 -1071.2 38.4 1167.4 11010.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.62063 0.01554 39.94 <2e-16 ***
## lag_48 0.37885 0.01554 24.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2310 on 1511 degrees of freedom
## Multiple R-squared: 0.9959, Adjusted R-squared: 0.9959
## F-statistic: 1.829e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_EIGHTEEN_pred <- predict(fit_lr, newdata=EIGHTEEN_Test_From_March)
summary(test_EIGHTEEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25678 29493 31409 32094 35152 38131
LR_EIGHTEEN_APE <- ((EIGHTEEN_Test_From_March$value-test_EIGHTEEN_pred)/EIGHTEEN_Test_From_March$value)*100
summary(LR_EIGHTEEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -19.384 -5.522 -1.093 -1.358 2.777 12.452
quantile(LR_EIGHTEEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -8.513632 6.011521
boxplot(LR_EIGHTEEN_APE)
#For 19 AM
NINETEEN_Train = Till_March$hour == 19
NINETEEN_Train_Till_March=Till_March[NINETEEN_Train,]
NINETEEN_Test = From_March$hour == 19
NINETEEN_Test_From_March=From_March[NINETEEN_Test,]
lr_NINETEEN=lm(value~-1+lag_168+lag_48,NINETEEN_Train_Till_March)
summary(lr_NINETEEN)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = NINETEEN_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11634.3 -976.1 43.3 1029.0 9793.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.56759 0.01610 35.25 <2e-16 ***
## lag_48 0.43220 0.01609 26.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2054 on 1511 degrees of freedom
## Multiple R-squared: 0.9967, Adjusted R-squared: 0.9967
## F-statistic: 2.316e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_NINETEEN_pred <- predict(fit_lr, newdata=NINETEEN_Test_From_March)
summary(test_NINETEEN_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27709 30785 32351 33386 36626 38923
LR_NINETEEN_APE <- ((NINETEEN_Test_From_March$value-test_NINETEEN_pred)/NINETEEN_Test_From_March$value)*100
summary(LR_NINETEEN_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -16.704 -4.540 -1.033 -1.209 2.537 9.732
quantile(LR_NINETEEN_APE, c(0.1,0.9), type=7)
## 10% 90%
## -7.682427 4.945652
boxplot(LR_NINETEEN_APE)
####-----------------------------------------------------------------------------------------------
#For 20 AM
TWENTY_Train = Till_March$hour == 20
TWENTY_Train_Till_March=Till_March[TWENTY_Train,]
TWENTY_Test = From_March$hour == 20
TWENTY_Test_From_March=From_March[TWENTY_Test,]
TWENTY_Test_From_March
## date hour value lag_168 lag_48
## 1: 2020-03-01 20 34901.84 35408.62 37798.26
## 2: 2020-03-02 20 38103.82 38806.87 36701.12
## 3: 2020-03-03 20 37553.37 38791.40 34901.84
## 4: 2020-03-04 20 37516.53 37911.17 38103.82
## 5: 2020-03-05 20 37890.57 37677.11 37553.37
## 6: 2020-03-06 20 37612.94 37798.26 37516.53
## 7: 2020-03-07 20 35383.78 36701.12 37890.57
## 8: 2020-03-08 20 32991.99 34901.84 37612.94
## 9: 2020-03-09 20 36416.24 38103.82 35383.78
## 10: 2020-03-10 20 37179.01 37553.37 32991.99
## 11: 2020-03-11 20 37504.32 37516.53 36416.24
## 12: 2020-03-12 20 37257.39 37890.57 37179.01
## 13: 2020-03-13 20 37014.04 37612.94 37504.32
## 14: 2020-03-14 20 35737.63 35383.78 37257.39
## 15: 2020-03-15 20 33399.95 32991.99 37014.04
## 16: 2020-03-16 20 38549.07 36416.24 35737.63
## 17: 2020-03-17 20 38770.56 37179.01 33399.95
## 18: 2020-03-18 20 38745.12 37504.32 38549.07
## 19: 2020-03-19 20 39019.53 37257.39 38770.56
## 20: 2020-03-20 20 38333.68 37014.04 38745.12
## 21: 2020-03-21 20 36129.86 35737.63 39019.53
## 22: 2020-03-22 20 32958.42 33399.95 38333.68
## 23: 2020-03-23 20 35426.58 38549.07 36129.86
## 24: 2020-03-24 20 35189.92 38770.56 32958.42
## 25: 2020-03-25 20 35224.23 38745.12 35426.58
## 26: 2020-03-26 20 34961.60 39019.53 35189.92
## 27: 2020-03-27 20 34646.27 38333.68 35224.23
## 28: 2020-03-28 20 33738.72 36129.86 34961.60
## 29: 2020-03-29 20 32035.87 32958.42 34646.27
## 30: 2020-03-30 20 33860.82 35426.58 33738.72
## 31: 2020-03-31 20 33834.92 35189.92 32035.87
## 32: 2020-04-01 20 33450.80 35224.23 33860.82
## 33: 2020-04-02 20 33598.93 34961.60 33834.92
## 34: 2020-04-03 20 33215.35 34646.27 33450.80
## 35: 2020-04-04 20 31779.01 33738.72 33598.93
## 36: 2020-04-05 20 29666.51 32035.87 33215.35
## 37: 2020-04-06 20 31939.14 33860.82 31779.01
## 38: 2020-04-07 20 31516.03 33834.92 29666.51
## 39: 2020-04-08 20 31717.96 33450.80 31939.14
## 40: 2020-04-09 20 31798.93 33598.93 31516.03
## 41: 2020-04-10 20 31323.08 33215.35 31717.96
## 42: 2020-04-11 20 28196.37 31779.01 31798.93
## 43: 2020-04-12 20 27578.09 29666.51 31323.08
## 44: 2020-04-13 20 31013.27 31939.14 28196.37
## 45: 2020-04-14 20 30884.52 31516.03 27578.09
## 46: 2020-04-15 20 31804.28 31717.96 31013.27
## 47: 2020-04-16 20 31851.34 31798.93 30884.52
## 48: 2020-04-17 20 31404.30 31323.08 31804.28
## 49: 2020-04-18 20 29470.41 28196.37 31851.34
## 50: 2020-04-19 20 28329.87 27578.09 31404.30
## 51: 2020-04-20 20 32025.23 31013.27 29470.41
## 52: 2020-04-21 20 32698.83 30884.52 28329.87
## 53: 2020-04-22 20 32240.14 31804.28 32025.23
## 54: 2020-04-23 20 30192.30 31851.34 32698.83
## 55: 2020-04-24 20 30362.95 31404.30 32240.14
## 56: 2020-04-25 20 29790.26 29470.41 30192.30
## 57: 2020-04-26 20 28909.54 28329.87 30362.95
## 58: 2020-04-27 20 31927.58 32025.23 29790.26
## 59: 2020-04-28 20 31804.13 32698.83 28909.54
## 60: 2020-04-29 20 31766.89 32240.14 31927.58
## 61: 2020-04-30 20 30599.28 30192.30 31804.13
## 62: 2020-05-01 20 27846.77 30362.95 31766.89
## 63: 2020-05-02 20 28051.38 29790.26 30599.28
## 64: 2020-05-03 20 28031.78 28909.54 27846.77
## 65: 2020-05-04 20 31891.64 31927.58 28051.38
## 66: 2020-05-05 20 31451.23 31804.13 28031.78
## 67: 2020-05-06 20 31574.27 31766.89 31891.64
## 68: 2020-05-07 20 31807.91 30599.28 31451.23
## 69: 2020-05-08 20 31464.30 27846.77 31574.27
## 70: 2020-05-09 20 28860.33 28051.38 31807.91
## 71: 2020-05-10 20 28216.79 28031.78 31464.30
## 72: 2020-05-11 20 31739.99 31891.64 28860.33
## 73: 2020-05-12 20 31893.32 31451.23 28216.79
## 74: 2020-05-13 20 32294.07 31574.27 31739.99
## 75: 2020-05-14 20 32961.79 31807.91 31893.32
## 76: 2020-05-15 20 32956.35 31464.30 32294.07
## 77: 2020-05-16 20 32161.14 28860.33 32961.79
## 78: 2020-05-17 20 31481.96 28216.79 32956.35
## 79: 2020-05-18 20 31974.95 31739.99 32161.14
## 80: 2020-05-19 20 31370.75 31893.32 31481.96
## date hour value lag_168 lag_48
lr_TWENTY=lm(value~-1+lag_168+lag_48,TWENTY_Train_Till_March)
summary(lr_TWENTY)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = TWENTY_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10310.6 -940.3 33.1 981.6 9029.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.54855 0.01629 33.67 <2e-16 ***
## lag_48 0.45129 0.01629 27.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1914 on 1511 degrees of freedom
## Multiple R-squared: 0.9972, Adjusted R-squared: 0.9972
## F-statistic: 2.65e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_TWENTY_pred <- predict(fit_lr, newdata=TWENTY_Test_From_March)
summary(test_TWENTY_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 28535 30790 32413 33325 36171 38076
LR_TWENTY_APE <- ((TWENTY_Test_From_March$value-test_TWENTY_pred)/TWENTY_Test_From_March$value)*100
summary(LR_TWENTY_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.6588 -3.8149 -0.9026 -1.1402 2.0174 8.2300
quantile(LR_TWENTY_APE, c(0.1,0.9), type=7)
## 10% 90%
## -6.798076 4.110301
boxplot(LR_TWENTY_APE)
#For 21 AM
TWENTYONE_Train = Till_March$hour == 21
TWENTYONE_Train_Till_March=Till_March[TWENTYONE_Train,]
TWENTYONE_Test = From_March$hour == 21
TWENTYONE_Test_From_March=From_March[TWENTYONE_Test,]
lr_TWENTYONE=lm(value~-1+lag_168+lag_48,TWENTYONE_Train_Till_March)
summary(lr_TWENTYONE)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = TWENTYONE_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10045.0 -905.8 16.2 983.3 9074.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.52872 0.01647 32.11 <2e-16 ***
## lag_48 0.47110 0.01646 28.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1848 on 1511 degrees of freedom
## Multiple R-squared: 0.9973, Adjusted R-squared: 0.9972
## F-statistic: 2.74e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_TWENTYONE_pred <- predict(fit_lr, newdata=TWENTYONE_Test_From_March)
summary(test_TWENTYONE_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27746 30440 31890 32538 35188 36949
LR_TWENTYONE_APE <- ((TWENTYONE_Test_From_March$value-test_TWENTYONE_pred)/TWENTYONE_Test_From_March$value)*100
summary(LR_TWENTYONE_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.8975 -4.2420 -0.8174 -0.9293 2.6597 8.3800
quantile(LR_TWENTYONE_APE, c(0.1,0.9), type=7)
## 10% 90%
## -7.231020 4.456819
boxplot(LR_TWENTYONE_APE)
####-----------------------------------------------------------------------------------------------
#For 22 AM
TWENTYTWO_Train = Till_March$hour == 22
TWENTYTWO_Train_Till_March=Till_March[TWENTYTWO_Train,]
TWENTYTWO_Test = From_March$hour == 22
TWENTYTWO_Test_From_March=From_March[TWENTYTWO_Test,]
lr_TWENTYTWO=lm(value~-1+lag_168+lag_48,TWENTYTWO_Train_Till_March)
summary(lr_TWENTYTWO)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = TWENTYTWO_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10320.4 -870.0 32.3 955.9 8636.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.50976 0.01660 30.71 <2e-16 ***
## lag_48 0.48995 0.01659 29.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1830 on 1511 degrees of freedom
## Multiple R-squared: 0.9972, Adjusted R-squared: 0.9972
## F-statistic: 2.693e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_TWENTYTWO_pred <- predict(fit_lr, newdata=TWENTYTWO_Test_From_March)
summary(test_TWENTYTWO_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 26958 29496 31083 31580 34151 35748
LR_TWENTYTWO_APE <- ((TWENTYTWO_Test_From_March$value-test_TWENTYTWO_pred)/TWENTYTWO_Test_From_March$value)*100
summary(LR_TWENTYTWO_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -13.2099 -3.7698 -0.4543 -0.8727 2.1555 8.8497
quantile(LR_TWENTYTWO_APE, c(0.1,0.9), type=7)
## 10% 90%
## -6.636470 4.937206
boxplot(LR_TWENTYTWO_APE)
#For 23 AM
TWENTYTHREE_Train = Till_March$hour == 23
TWENTYTHREE_Train_Till_March=Till_March[TWENTYTHREE_Train,]
TWENTYTHREE_Test = From_March$hour == 23
TWENTYTHREE_Test_From_March=From_March[TWENTYTHREE_Test,]
lr_TWENTYTHREE=lm(value~-1+lag_168+lag_48,TWENTYTHREE_Train_Till_March)
summary(lr_TWENTYTHREE)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = TWENTYTHREE_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9627.4 -857.7 79.2 898.9 8380.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.50471 0.01662 30.36 <2e-16 ***
## lag_48 0.49503 0.01662 29.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1739 on 1511 degrees of freedom
## Multiple R-squared: 0.9972, Adjusted R-squared: 0.9972
## F-statistic: 2.738e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_TWENTYTHREE_pred <- predict(fit_lr, newdata=TWENTYTHREE_Test_From_March)
summary(test_TWENTYTHREE_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25749 28327 29977 30323 32653 34230
LR_TWENTYTHREE_APE <- ((TWENTYTHREE_Test_From_March$value-test_TWENTYTHREE_pred)/TWENTYTHREE_Test_From_March$value)*100
summary(LR_TWENTYTHREE_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -13.4273 -3.6638 -0.6457 -0.8373 2.7343 8.7170
quantile(LR_TWENTYTHREE_APE, c(0.1,0.9), type=7)
## 10% 90%
## -6.799551 4.973260
boxplot(LR_TWENTYTHREE_APE)
####-----------------------------------------------------------------------------------------------
#For 0 AM
ZERO_Train = Till_March$hour == 0
ZERO_Train_Till_March=Till_March[ZERO_Train,]
ZERO_Test = From_March$hour == 0
ZERO_Test_From_March=From_March[ZERO_Test,]
lr_ZERO=lm(value~-1+lag_168+lag_48,ZERO_Train_Till_March)
summary(lr_ZERO)
##
## Call:
## lm(formula = value ~ -1 + lag_168 + lag_48, data = ZERO_Train_Till_March)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9256.6 -785.0 96.0 879.4 6710.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## lag_168 0.47201 0.01670 28.27 <2e-16 ***
## lag_48 0.52782 0.01669 31.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1584 on 1511 degrees of freedom
## Multiple R-squared: 0.9974, Adjusted R-squared: 0.9974
## F-statistic: 2.921e+05 on 2 and 1511 DF, p-value: < 2.2e-16
test_ZERO_pred <- predict(fit_lr, newdata=ZERO_Test_From_March)
summary(test_ZERO_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24548 27089 28633 28850 31030 32462
LR_ZERO_APE <- ((ZERO_Test_From_March$value-test_ZERO_pred)/ZERO_Test_From_March$value)*100
summary(LR_ZERO_APE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.7999 -3.3639 -0.5678 -0.7050 2.4006 8.7905
quantile(LR_ZERO_APE, c(0.1,0.9), type=7)
## 10% 90%
## -7.061885 5.096714
boxplot(LR_ZERO_APE)
QUESTION D -
Data Prep: In order to create the 48 predictors, we have get 24 predictors from each day first, then combined them, which gave us pre feature set. Then we combined the value column including the actual consumption in that specified day at the end.
Models & Evaluation: We have determined the min lambda value with 10-fold CV as specified. Then, predicted the values with a model with lambda value having lowest mse. Finally, calculated the mape values. What we have seen is that for each model in each our, the # of significant parameters and their distribution have changed. At the same time, error slope for various levels in each our has changed as well. However, the mean mape values generally close to each other
#Question D
wide_predictor1=dcast(full_data,date~paste0('lag_7days_',hour),value.var='lag_168')
wide_predictor1
## date lag_7days_0 lag_7days_1 lag_7days_10 lag_7days_11 lag_7days_12
## 1: 2016-01-08 26277.24 24991.82 27224.96 28908.04 28789.25
## 2: 2016-01-09 26224.60 24708.58 32102.38 33431.89 32910.61
## 3: 2016-01-10 27613.96 25779.28 28779.88 30204.30 30588.30
## 4: 2016-01-11 27103.09 25439.84 38606.67 39659.57 38416.32
## 5: 2016-01-12 29244.38 27637.23 37776.49 38664.22 37522.11
## ---
## 1589: 2020-05-15 28823.89 27867.03 27933.39 28860.92 28438.05
## 1590: 2020-05-16 28355.07 27285.61 22320.00 22898.85 23088.35
## 1591: 2020-05-17 26276.08 25140.01 18187.11 18872.01 19285.57
## 1592: 2020-05-18 26344.50 25579.00 27665.57 28103.67 27670.36
## 1593: 2020-05-19 29328.44 28071.83 27598.28 28281.32 27724.62
## lag_7days_13 lag_7days_14 lag_7days_15 lag_7days_16 lag_7days_17
## 1: 29367.70 29548.32 29390.89 30734.97 32048.02
## 2: 32887.61 32796.18 32594.55 33358.47 34387.95
## 3: 30902.63 31056.47 30979.53 32297.46 33462.96
## 4: 39279.26 39661.98 39211.00 39760.12 39633.29
## 5: 37820.69 38086.11 37834.89 38599.34 38669.63
## ---
## 1589: 29294.85 30504.06 30448.27 30671.43 30950.90
## 1590: 23994.63 24520.28 24808.20 25415.59 26402.35
## 1591: 20111.51 20883.75 21519.65 22469.65 24039.53
## 1592: 28571.60 29459.86 29666.89 30030.69 30644.19
## 1593: 28673.87 29681.74 29850.90 30535.80 31083.40
## lag_7days_18 lag_7days_19 lag_7days_2 lag_7days_20 lag_7days_21
## 1: 31438.11 30728.47 23532.61 30166.14 29461.28
## 2: 33591.26 32648.83 23771.58 31897.73 31049.20
## 3: 33151.03 32450.21 24566.31 32093.93 31315.16
## 4: 37621.41 35954.18 24395.07 35114.12 34066.40
## 5: 36954.78 35306.11 26335.78 34301.59 33400.82
## ---
## 1589: 31058.46 31685.50 27799.32 31464.30 31466.13
## 1590: 27972.52 29499.82 26937.68 28860.33 29210.51
## 1591: 26467.82 28236.14 25185.61 28216.79 28332.30
## 1592: 31264.81 32165.03 25406.19 31739.99 31912.72
## 1593: 31661.53 32431.97 27914.67 31893.32 32262.92
## lag_7days_22 lag_7days_23 lag_7days_3 lag_7days_4 lag_7days_5 lag_7days_6
## 1: 29242.83 28069.09 22464.78 22002.91 21957.08 22203.54
## 2: 30906.43 29621.09 22921.29 22870.89 23325.63 23604.98
## 3: 31014.19 29462.06 23878.42 23511.38 23672.32 23827.08
## 4: 33686.83 31733.54 23961.03 23860.63 24418.47 25708.59
## 5: 33359.37 31629.97 25870.23 25975.07 26318.48 27109.29
## ---
## 1589: 30411.30 29346.55 27108.78 26221.67 24542.32 23139.22
## 1590: 28406.86 27206.13 26309.66 25342.75 23416.64 21722.26
## 1591: 27520.26 26766.80 24585.00 23336.61 21324.52 19789.23
## 1592: 31301.20 30337.83 24876.69 23861.08 22338.17 21125.90
## 1593: 31651.02 30641.29 26983.37 26182.43 24280.86 22877.07
## lag_7days_7 lag_7days_8 lag_7days_9
## 1: 21844.16 23094.73 25202.27
## 2: 24022.70 26930.48 30043.60
## 3: 23405.11 24638.72 26802.38
## 4: 27455.39 32982.07 37175.84
## 5: 28570.22 33106.00 36657.12
## ---
## 1589: 23334.62 25910.58 27529.80
## 1590: 21211.21 21657.34 21838.66
## 1591: 18757.53 18050.22 18098.28
## 1592: 21709.74 25132.28 26935.06
## 1593: 23413.05 25826.35 27312.23
wide_predictor2=dcast(full_data,date~paste0('lag_2days_',hour),value.var='lag_48')
wide_predictor2
## date lag_2days_0 lag_2days_1 lag_2days_10 lag_2days_11 lag_2days_12
## 1: 2016-01-08 29189.27 27614.02 37212.91 37844.74 36194.13
## 2: 2016-01-09 28763.95 27284.84 36500.83 37350.92 35900.99
## 3: 2016-01-10 28602.02 27112.37 36843.20 37140.00 34622.56
## 4: 2016-01-11 29906.93 28061.98 34792.84 35382.85 34131.49
## 5: 2016-01-12 28890.37 27133.75 28136.07 29059.35 28947.35
## ---
## 1589: 2020-05-15 29653.44 28408.88 28479.78 29192.88 28272.81
## 1590: 2020-05-16 29654.33 28569.86 28629.95 29486.18 28767.50
## 1591: 2020-05-17 29866.82 29011.24 29135.62 29669.81 29003.34
## 1592: 2020-05-18 29347.36 28864.44 25826.00 26398.61 26428.49
## 1593: 2020-05-19 28922.42 27911.86 22440.56 23304.85 23651.87
## lag_2days_13 lag_2days_14 lag_2days_15 lag_2days_16 lag_2days_17
## 1: 36542.73 36847.22 36433.88 37581.09 37304.52
## 2: 36800.39 37376.83 37100.43 37668.65 37906.99
## 3: 36134.82 36555.58 36190.41 37031.72 37773.70
## 4: 33827.99 33524.80 32951.39 33655.89 35045.14
## 5: 28944.66 29089.83 29447.75 30569.07 32006.15
## ---
## 1589: 28751.81 29700.43 30292.85 30705.21 31098.51
## 1590: 29610.97 30392.52 30853.42 31308.87 31952.85
## 1591: 29856.70 31136.61 31603.25 31959.06 32283.64
## 1592: 27471.11 28300.69 28814.80 29410.35 30326.27
## 1593: 24672.99 25753.33 26486.85 27197.05 28663.59
## lag_2days_18 lag_2days_19 lag_2days_2 lag_2days_20 lag_2days_21
## 1: 35832.00 34499.94 26578.97 33670.00 32931.12
## 2: 35841.62 34621.65 26321.95 33784.72 32638.14
## 3: 35930.69 34710.46 25975.34 33907.65 33048.69
## 4: 34407.27 33494.32 26808.78 32624.31 32097.79
## 5: 31706.26 31305.87 25656.13 30870.26 30462.79
## ---
## 1589: 31729.46 32712.32 28221.92 32294.07 32650.98
## 1590: 32577.43 33509.02 28268.56 32961.79 33160.40
## 1591: 32703.70 33536.90 28421.38 32956.35 33253.20
## 1592: 31654.19 32935.41 28385.73 32161.14 32188.20
## 1593: 30360.19 31802.96 27556.82 31481.96 31349.23
## lag_2days_22 lag_2days_23 lag_2days_3 lag_2days_4 lag_2days_5 lag_2days_6
## 1: 32913.06 31450.65 25719.19 25864.63 25918.59 27091.94
## 2: 32739.98 31092.87 25748.49 25636.58 25932.52 26963.74
## 3: 33220.16 32021.60 25315.55 25128.15 25356.22 26338.72
## 4: 32176.63 30760.17 25798.80 25820.46 26035.77 26451.24
## 5: 30537.75 28857.66 24937.87 24538.16 24616.05 24478.14
## ---
## 1589: 31868.85 30952.90 27321.43 26574.47 24858.13 23656.06
## 1590: 32424.54 31454.48 27370.27 26614.99 24812.11 23606.27
## 1591: 32108.47 31057.29 27674.78 26809.79 25014.42 23884.93
## 1592: 31305.64 30113.52 27282.15 26273.67 24149.67 22924.34
## 1593: 30657.57 29582.26 26602.40 25372.84 23399.61 21838.52
## lag_2days_7 lag_2days_8 lag_2days_9
## 1: 28533.05 33203.34 36257.22
## 2: 28444.83 32804.27 35608.30
## 3: 28086.42 32702.24 35788.17
## 4: 26853.42 30627.32 33468.25
## 5: 23813.35 24676.92 26693.26
## ---
## 1589: 24014.48 26302.70 27652.68
## 1590: 23814.13 26537.74 28054.28
## 1591: 24216.42 26890.91 28589.10
## 1592: 22881.00 24326.07 25074.01
## 1593: 21472.99 21547.52 21902.16
pre_feature_set=merge(wide_predictor1,wide_predictor2, by='date')
pre_feature_set
## date lag_7days_0 lag_7days_1 lag_7days_10 lag_7days_11 lag_7days_12
## 1: 2016-01-08 26277.24 24991.82 27224.96 28908.04 28789.25
## 2: 2016-01-09 26224.60 24708.58 32102.38 33431.89 32910.61
## 3: 2016-01-10 27613.96 25779.28 28779.88 30204.30 30588.30
## 4: 2016-01-11 27103.09 25439.84 38606.67 39659.57 38416.32
## 5: 2016-01-12 29244.38 27637.23 37776.49 38664.22 37522.11
## ---
## 1589: 2020-05-15 28823.89 27867.03 27933.39 28860.92 28438.05
## 1590: 2020-05-16 28355.07 27285.61 22320.00 22898.85 23088.35
## 1591: 2020-05-17 26276.08 25140.01 18187.11 18872.01 19285.57
## 1592: 2020-05-18 26344.50 25579.00 27665.57 28103.67 27670.36
## 1593: 2020-05-19 29328.44 28071.83 27598.28 28281.32 27724.62
## lag_7days_13 lag_7days_14 lag_7days_15 lag_7days_16 lag_7days_17
## 1: 29367.70 29548.32 29390.89 30734.97 32048.02
## 2: 32887.61 32796.18 32594.55 33358.47 34387.95
## 3: 30902.63 31056.47 30979.53 32297.46 33462.96
## 4: 39279.26 39661.98 39211.00 39760.12 39633.29
## 5: 37820.69 38086.11 37834.89 38599.34 38669.63
## ---
## 1589: 29294.85 30504.06 30448.27 30671.43 30950.90
## 1590: 23994.63 24520.28 24808.20 25415.59 26402.35
## 1591: 20111.51 20883.75 21519.65 22469.65 24039.53
## 1592: 28571.60 29459.86 29666.89 30030.69 30644.19
## 1593: 28673.87 29681.74 29850.90 30535.80 31083.40
## lag_7days_18 lag_7days_19 lag_7days_2 lag_7days_20 lag_7days_21
## 1: 31438.11 30728.47 23532.61 30166.14 29461.28
## 2: 33591.26 32648.83 23771.58 31897.73 31049.20
## 3: 33151.03 32450.21 24566.31 32093.93 31315.16
## 4: 37621.41 35954.18 24395.07 35114.12 34066.40
## 5: 36954.78 35306.11 26335.78 34301.59 33400.82
## ---
## 1589: 31058.46 31685.50 27799.32 31464.30 31466.13
## 1590: 27972.52 29499.82 26937.68 28860.33 29210.51
## 1591: 26467.82 28236.14 25185.61 28216.79 28332.30
## 1592: 31264.81 32165.03 25406.19 31739.99 31912.72
## 1593: 31661.53 32431.97 27914.67 31893.32 32262.92
## lag_7days_22 lag_7days_23 lag_7days_3 lag_7days_4 lag_7days_5 lag_7days_6
## 1: 29242.83 28069.09 22464.78 22002.91 21957.08 22203.54
## 2: 30906.43 29621.09 22921.29 22870.89 23325.63 23604.98
## 3: 31014.19 29462.06 23878.42 23511.38 23672.32 23827.08
## 4: 33686.83 31733.54 23961.03 23860.63 24418.47 25708.59
## 5: 33359.37 31629.97 25870.23 25975.07 26318.48 27109.29
## ---
## 1589: 30411.30 29346.55 27108.78 26221.67 24542.32 23139.22
## 1590: 28406.86 27206.13 26309.66 25342.75 23416.64 21722.26
## 1591: 27520.26 26766.80 24585.00 23336.61 21324.52 19789.23
## 1592: 31301.20 30337.83 24876.69 23861.08 22338.17 21125.90
## 1593: 31651.02 30641.29 26983.37 26182.43 24280.86 22877.07
## lag_7days_7 lag_7days_8 lag_7days_9 lag_2days_0 lag_2days_1 lag_2days_10
## 1: 21844.16 23094.73 25202.27 29189.27 27614.02 37212.91
## 2: 24022.70 26930.48 30043.60 28763.95 27284.84 36500.83
## 3: 23405.11 24638.72 26802.38 28602.02 27112.37 36843.20
## 4: 27455.39 32982.07 37175.84 29906.93 28061.98 34792.84
## 5: 28570.22 33106.00 36657.12 28890.37 27133.75 28136.07
## ---
## 1589: 23334.62 25910.58 27529.80 29653.44 28408.88 28479.78
## 1590: 21211.21 21657.34 21838.66 29654.33 28569.86 28629.95
## 1591: 18757.53 18050.22 18098.28 29866.82 29011.24 29135.62
## 1592: 21709.74 25132.28 26935.06 29347.36 28864.44 25826.00
## 1593: 23413.05 25826.35 27312.23 28922.42 27911.86 22440.56
## lag_2days_11 lag_2days_12 lag_2days_13 lag_2days_14 lag_2days_15
## 1: 37844.74 36194.13 36542.73 36847.22 36433.88
## 2: 37350.92 35900.99 36800.39 37376.83 37100.43
## 3: 37140.00 34622.56 36134.82 36555.58 36190.41
## 4: 35382.85 34131.49 33827.99 33524.80 32951.39
## 5: 29059.35 28947.35 28944.66 29089.83 29447.75
## ---
## 1589: 29192.88 28272.81 28751.81 29700.43 30292.85
## 1590: 29486.18 28767.50 29610.97 30392.52 30853.42
## 1591: 29669.81 29003.34 29856.70 31136.61 31603.25
## 1592: 26398.61 26428.49 27471.11 28300.69 28814.80
## 1593: 23304.85 23651.87 24672.99 25753.33 26486.85
## lag_2days_16 lag_2days_17 lag_2days_18 lag_2days_19 lag_2days_2
## 1: 37581.09 37304.52 35832.00 34499.94 26578.97
## 2: 37668.65 37906.99 35841.62 34621.65 26321.95
## 3: 37031.72 37773.70 35930.69 34710.46 25975.34
## 4: 33655.89 35045.14 34407.27 33494.32 26808.78
## 5: 30569.07 32006.15 31706.26 31305.87 25656.13
## ---
## 1589: 30705.21 31098.51 31729.46 32712.32 28221.92
## 1590: 31308.87 31952.85 32577.43 33509.02 28268.56
## 1591: 31959.06 32283.64 32703.70 33536.90 28421.38
## 1592: 29410.35 30326.27 31654.19 32935.41 28385.73
## 1593: 27197.05 28663.59 30360.19 31802.96 27556.82
## lag_2days_20 lag_2days_21 lag_2days_22 lag_2days_23 lag_2days_3
## 1: 33670.00 32931.12 32913.06 31450.65 25719.19
## 2: 33784.72 32638.14 32739.98 31092.87 25748.49
## 3: 33907.65 33048.69 33220.16 32021.60 25315.55
## 4: 32624.31 32097.79 32176.63 30760.17 25798.80
## 5: 30870.26 30462.79 30537.75 28857.66 24937.87
## ---
## 1589: 32294.07 32650.98 31868.85 30952.90 27321.43
## 1590: 32961.79 33160.40 32424.54 31454.48 27370.27
## 1591: 32956.35 33253.20 32108.47 31057.29 27674.78
## 1592: 32161.14 32188.20 31305.64 30113.52 27282.15
## 1593: 31481.96 31349.23 30657.57 29582.26 26602.40
## lag_2days_4 lag_2days_5 lag_2days_6 lag_2days_7 lag_2days_8 lag_2days_9
## 1: 25864.63 25918.59 27091.94 28533.05 33203.34 36257.22
## 2: 25636.58 25932.52 26963.74 28444.83 32804.27 35608.30
## 3: 25128.15 25356.22 26338.72 28086.42 32702.24 35788.17
## 4: 25820.46 26035.77 26451.24 26853.42 30627.32 33468.25
## 5: 24538.16 24616.05 24478.14 23813.35 24676.92 26693.26
## ---
## 1589: 26574.47 24858.13 23656.06 24014.48 26302.70 27652.68
## 1590: 26614.99 24812.11 23606.27 23814.13 26537.74 28054.28
## 1591: 26809.79 25014.42 23884.93 24216.42 26890.91 28589.10
## 1592: 26273.67 24149.67 22924.34 22881.00 24326.07 25074.01
## 1593: 25372.84 23399.61 21838.52 21472.99 21547.52 21902.16
#targets
target0=full_data[hour==0]
target1=full_data[hour==1]
target2=full_data[hour==2]
target3=full_data[hour==3]
target4=full_data[hour==4]
target5=full_data[hour==5]
target6=full_data[hour==6]
target7=full_data[hour==7]
target8=full_data[hour==8]
target9=full_data[hour==9]
target10=full_data[hour==10]
target11=full_data[hour==11]
target12=full_data[hour==12]
target13=full_data[hour==13]
target14=full_data[hour==14]
target15=full_data[hour==15]
target16=full_data[hour==16]
target17=full_data[hour==17]
target18=full_data[hour==18]
target19=full_data[hour==19]
target20=full_data[hour==20]
target21=full_data[hour==21]
target22=full_data[hour==22]
target23=full_data[hour==23]
#final_feature_sets
final_feature_set_zero=merge(pre_feature_set,target0[,list(date,value)],by='date')
final_feature_set_one=merge(pre_feature_set,target1[,list(date,value)],by='date')
final_feature_set_two=merge(pre_feature_set,target2[,list(date,value)],by='date')
final_feature_set_three=merge(pre_feature_set,target3[,list(date,value)],by='date')
final_feature_set_four=merge(pre_feature_set,target4[,list(date,value)],by='date')
final_feature_set_five=merge(pre_feature_set,target5[,list(date,value)],by='date')
final_feature_set_six=merge(pre_feature_set,target6[,list(date,value)],by='date')
final_feature_set_seven=merge(pre_feature_set,target7[,list(date,value)],by='date')
final_feature_set_eight=merge(pre_feature_set,target8[,list(date,value)],by='date')
final_feature_set_nine=merge(pre_feature_set,target9[,list(date,value)],by='date')
final_feature_set_ten=merge(pre_feature_set,target10[,list(date,value)],by='date')
final_feature_set_eleven=merge(pre_feature_set,target11[,list(date,value)],by='date')
final_feature_set_twelve=merge(pre_feature_set,target12[,list(date,value)],by='date')
final_feature_set_thirteen=merge(pre_feature_set,target13[,list(date,value)],by='date')
final_feature_set_fourteen=merge(pre_feature_set,target14[,list(date,value)],by='date')
final_feature_set_fifteen=merge(pre_feature_set,target15[,list(date,value)],by='date')
final_feature_set_sixteen=merge(pre_feature_set,target16[,list(date,value)],by='date')
final_feature_set_seventeen=merge(pre_feature_set,target17[,list(date,value)],by='date')
final_feature_set_eighteen=merge(pre_feature_set,target18[,list(date,value)],by='date')
final_feature_set_nineteen=merge(pre_feature_set,target19[,list(date,value)],by='date')
final_feature_set_twenty=merge(pre_feature_set,target20[,list(date,value)],by='date')
final_feature_set_TWENTYone=merge(pre_feature_set,target21[,list(date,value)],by='date')
final_feature_set_twentytwo=merge(pre_feature_set,target22[,list(date,value)],by='date')
final_feature_set_twentythree=merge(pre_feature_set,target23[,list(date,value)],by='date')
##QUESTION D MODELS SECTION
###-----------------------------------------------------------------------------------
### REPEAT MODELS SECTION FOR 24 HOURS
###-----------------------------------------------------------------------------------
###ZERO
str(final_feature_set_zero)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 28602 29907 28890 27131 29162 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_zero<- final_feature_set_zero[c(1:1513),c(2:50)]
str(train_zero)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 28602 29907 28890 27131 29162 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_zero <- final_feature_set_zero[c(1514:1593),c(2:49)]
train_zero_mat= as.matrix(train_zero[,-49])
train_zero_target= train_zero[,49]
train_zero_target_mat= as.matrix(train_zero_target)
test_zero_mat= as.matrix(test_zero)
fit_cvglmnet_zero=cv.glmnet(train_zero_mat,train_zero_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_zero)
## List of 11
## $ lambda : num [1:100] 2444 2227 2029 1849 1685 ...
## $ cvm : num [1:100] 8032689 7048061 6203245 5501832 4919441 ...
## $ cvsd : num [1:100] 237239 214478 186910 164120 145396 ...
## $ cvup : num [1:100] 8269928 7262539 6390155 5665952 5064837 ...
## $ cvlo : num [1:100] 7795450 6833583 6016334 5337712 4774045 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 2 2 2 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_zero_mat, y = train_zero_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 31043 28608 26389 24367 22524 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1573] 40 40 40 40 40 0 40 0 40 0 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 7 9 11 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1573] 0.0738 0.1411 0.2024 0.2582 0.3091 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 2 2 2 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2444 2227 2029 1849 1685 ...
## ..$ dev.ratio: num [1:100] 0 0.126 0.231 0.318 0.39 ...
## ..$ nulldev : num 1.22e+10
## ..$ npasses : int 5950
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_zero_mat, y = train_zero_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.389
## $ lambda.1se: num 11.1
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_zero,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.165312e+03
## lag_7days_0 1.301807e-01
## lag_7days_1 1.123452e-01
## lag_7days_10 .
## lag_7days_11 .
## lag_7days_12 1.301218e-02
## lag_7days_13 .
## lag_7days_14 6.924886e-02
## lag_7days_15 8.194070e-02
## lag_7days_16 .
## lag_7days_17 -8.815951e-02
## lag_7days_18 .
## lag_7days_19 -1.298709e-01
## lag_7days_2 -1.212898e-01
## lag_7days_20 -2.114393e-01
## lag_7days_21 2.619157e-03
## lag_7days_22 -6.594589e-02
## lag_7days_23 3.852123e-02
## lag_7days_3 -9.917847e-02
## lag_7days_4 .
## lag_7days_5 .
## lag_7days_6 1.366126e-01
## lag_7days_7 2.462622e-01
## lag_7days_8 .
## lag_7days_9 -5.728500e-02
## lag_2days_0 -3.051305e-01
## lag_2days_1 -2.256434e-01
## lag_2days_10 -2.542962e-01
## lag_2days_11 -2.005519e-01
## lag_2days_12 1.621539e-01
## lag_2days_13 5.432935e-01
## lag_2days_14 -3.987006e-01
## lag_2days_15 1.001551e-01
## lag_2days_16 1.650704e-01
## lag_2days_17 3.010504e-02
## lag_2days_18 -6.991155e-02
## lag_2days_19 .
## lag_2days_2 -1.027664e-01
## lag_2days_20 -2.581932e-01
## lag_2days_21 1.538007e-01
## lag_2days_22 3.441022e-01
## lag_2days_23 1.025497e+00
## lag_2days_3 .
## lag_2days_4 4.308649e-01
## lag_2days_5 .
## lag_2days_6 -1.922691e-02
## lag_2days_7 4.290359e-01
## lag_2days_8 .
## lag_2days_9 -5.571962e-01
plot(fit_cvglmnet_zero)
fit_cvglmnet_zero
##
## Call: cv.glmnet(x = train_zero_mat, y = train_zero_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.389 717861 68307 36
## 1se 11.085 785103 76034 18
test_zero_pred=predict(fit_cvglmnet_zero, newx=test_zero_mat,s=c("lambda.min"))
test_zero_actual=final_feature_set_zero[c(1514:1593),c(50)]
test_zero_mape=((test_zero_actual-test_zero_pred)/test_zero_actual)*100
summary(test_zero_mape)
## value
## Min. :-7.7927
## 1st Qu.:-3.7155
## Median :-1.1533
## Mean :-1.2096
## 3rd Qu.: 0.7053
## Max. : 6.1696
boxplot(test_zero_mape)
###-----------------------------------------------------------------------------------
###ONE
str(final_feature_set_one)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 27112 28062 27134 25403 27519 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_one<- final_feature_set_one[c(1:1513),c(2:50)]
str(train_one)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 27112 28062 27134 25403 27519 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_one <- final_feature_set_one[c(1514:1593),c(2:49)]
train_one_mat= as.matrix(train_one[,-49])
train_one_target= train_one[,49]
train_one_target_mat= as.matrix(train_one_target)
test_one_mat= as.matrix(test_one)
fit_cvglmnet_one=cv.glmnet(train_one_mat,train_one_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_one)
## List of 11
## $ lambda : num [1:100] 2351 2142 1952 1778 1620 ...
## $ cvm : num [1:100] 7632278 6727738 5946781 5298404 4760055 ...
## $ cvsd : num [1:100] 325789 297146 264010 236575 213854 ...
## $ cvup : num [1:100] 7958067 7024885 6210791 5534979 4973909 ...
## $ cvlo : num [1:100] 7306489 6430592 5682771 5061829 4546201 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_one_mat, y = train_one_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 29447 27105 24971 23026 21254 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1592] 40 40 40 40 1 40 1 40 1 40 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 6 8 10 12 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1592] 7.10e-02 1.36e-01 1.95e-01 2.48e-01 7.25e-05 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2351 2142 1952 1778 1620 ...
## ..$ dev.ratio: num [1:100] 0 0.123 0.224 0.309 0.379 ...
## ..$ nulldev : num 1.16e+10
## ..$ npasses : int 6750
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_one_mat, y = train_one_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.718
## $ lambda.1se: num 8.06
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_one,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.160538e+03
## lag_7days_0 8.383615e-04
## lag_7days_1 9.543930e-02
## lag_7days_10 .
## lag_7days_11 .
## lag_7days_12 .
## lag_7days_13 5.755387e-03
## lag_7days_14 6.011636e-02
## lag_7days_15 9.249076e-02
## lag_7days_16 .
## lag_7days_17 -6.320004e-02
## lag_7days_18 -3.592084e-02
## lag_7days_19 -1.273145e-01
## lag_7days_2 -4.204038e-03
## lag_7days_20 -1.779232e-01
## lag_7days_21 2.294157e-03
## lag_7days_22 -4.628108e-02
## lag_7days_23 1.972426e-03
## lag_7days_3 -5.636410e-02
## lag_7days_4 .
## lag_7days_5 .
## lag_7days_6 1.129425e-01
## lag_7days_7 2.493198e-01
## lag_7days_8 .
## lag_7days_9 -5.161132e-02
## lag_2days_0 -5.168947e-01
## lag_2days_1 -4.339461e-02
## lag_2days_10 -2.702989e-01
## lag_2days_11 -2.632964e-01
## lag_2days_12 1.218496e-01
## lag_2days_13 4.804767e-01
## lag_2days_14 -2.313618e-01
## lag_2days_15 7.167565e-02
## lag_2days_16 1.423860e-01
## lag_2days_17 .
## lag_2days_18 -3.513565e-02
## lag_2days_19 2.134670e-02
## lag_2days_2 .
## lag_2days_20 -2.937882e-01
## lag_2days_21 1.180804e-01
## lag_2days_22 3.213087e-01
## lag_2days_23 1.047082e+00
## lag_2days_3 .
## lag_2days_4 4.076952e-01
## lag_2days_5 -1.181515e-02
## lag_2days_6 -1.626310e-02
## lag_2days_7 3.782888e-01
## lag_2days_8 1.204575e-03
## lag_2days_9 -4.661783e-01
plot(fit_cvglmnet_one)
fit_cvglmnet_one
##
## Call: cv.glmnet(x = train_one_mat, y = train_one_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.718 729770 56467 38
## 1se 8.064 783589 67985 19
test_one_pred=predict(fit_cvglmnet_one, newx=test_one_mat,s=c("lambda.min"))
test_one_actual=final_feature_set_one[c(1514:1593),c(50)]
test_one_mape=((test_one_actual-test_one_pred)/test_one_actual)*100
summary(test_one_mape)
## value
## Min. :-9.1339
## 1st Qu.:-3.9874
## Median :-1.2988
## Mean :-1.3966
## 3rd Qu.: 0.7813
## Max. : 5.2955
boxplot(test_one_mape)
###-----------------------------------------------------------------------------------
###TWO
str(final_feature_set_two)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25975 26809 25656 24453 26256 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_two<- final_feature_set_two[c(1:1513),c(2:50)]
str(train_two)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25975 26809 25656 24453 26256 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_two <- final_feature_set_two[c(1514:1593),c(2:49)]
train_two_mat= as.matrix(train_two[,-49])
train_two_target= train_two[,49]
train_two_target_mat= as.matrix(train_two_target)
test_two_mat= as.matrix(test_two)
fit_cvglmnet_two=cv.glmnet(train_two_mat,train_two_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_two)
## List of 11
## $ lambda : num [1:100] 2226 2028 1848 1684 1534 ...
## $ cvm : num [1:100] 7128056 6325038 5621944 5038174 4552570 ...
## $ cvsd : num [1:100] 287595 267291 236027 210059 188850 ...
## $ cvup : num [1:100] 7415651 6592329 5857970 5248232 4741419 ...
## $ cvlo : num [1:100] 6840462 6057747 5385917 4828115 4363720 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_two_mat, y = train_two_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 28335 26117 24096 22254 20576 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1633] 40 40 40 40 12 40 12 40 12 40 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 6 8 10 12 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1633] 0.06723 0.12849 0.18431 0.23517 0.00845 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2226 2028 1848 1684 1534 ...
## ..$ dev.ratio: num [1:100] 0 0.118 0.216 0.297 0.365 ...
## ..$ nulldev : num 1.08e+10
## ..$ npasses : int 6994
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_two_mat, y = train_two_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.564
## $ lambda.1se: num 6.34
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_two,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.644439e+03
## lag_7days_0 .
## lag_7days_1 1.740410e-02
## lag_7days_10 -2.103699e-04
## lag_7days_11 -8.144695e-03
## lag_7days_12 -1.941349e-04
## lag_7days_13 3.175318e-02
## lag_7days_14 2.311148e-02
## lag_7days_15 9.732343e-02
## lag_7days_16 1.024151e-02
## lag_7days_17 -3.258678e-02
## lag_7days_18 -6.133421e-02
## lag_7days_19 -1.388156e-01
## lag_7days_2 .
## lag_7days_20 -1.353157e-01
## lag_7days_21 1.826394e-03
## lag_7days_22 -7.286732e-02
## lag_7days_23 2.969446e-03
## lag_7days_3 .
## lag_7days_4 .
## lag_7days_5 .
## lag_7days_6 7.505346e-02
## lag_7days_7 2.882159e-01
## lag_7days_8 3.240868e-02
## lag_7days_9 -7.953254e-02
## lag_2days_0 -6.496945e-01
## lag_2days_1 -1.662833e-01
## lag_2days_10 -2.270955e-01
## lag_2days_11 -3.183013e-01
## lag_2days_12 1.173740e-01
## lag_2days_13 4.853127e-01
## lag_2days_14 -2.226273e-01
## lag_2days_15 5.774947e-02
## lag_2days_16 1.526549e-01
## lag_2days_17 .
## lag_2days_18 -6.812333e-02
## lag_2days_19 1.096732e-01
## lag_2days_2 .
## lag_2days_20 -4.143523e-01
## lag_2days_21 1.941751e-01
## lag_2days_22 3.064717e-01
## lag_2days_23 1.019653e+00
## lag_2days_3 1.330597e-01
## lag_2days_4 6.212849e-01
## lag_2days_5 -1.907754e-02
## lag_2days_6 -5.881519e-02
## lag_2days_7 3.323402e-01
## lag_2days_8 2.881194e-02
## lag_2days_9 -4.657943e-01
plot(fit_cvglmnet_two)
fit_cvglmnet_two
##
## Call: cv.glmnet(x = train_two_mat, y = train_two_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.564 748376 46954 41
## 1se 6.340 792663 48062 20
test_two_pred=predict(fit_cvglmnet_two, newx=test_two_mat,s=c("lambda.min"))
test_two_actual=final_feature_set_two[c(1514:1593),c(50)]
test_two_mape=((test_two_actual-test_two_pred)/test_two_actual)*100
summary(test_two_mape)
## value
## Min. :-8.7982
## 1st Qu.:-4.3838
## Median :-1.1976
## Mean :-1.5191
## 3rd Qu.: 0.6355
## Max. : 8.7563
boxplot(test_two_mape)
###-----------------------------------------------------------------------------------
###THREE
str(final_feature_set_three)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25316 25799 24938 23960 25766 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_three<- final_feature_set_three[c(1:1513),c(2:50)]
str(train_three)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25316 25799 24938 23960 25766 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_three <- final_feature_set_three[c(1514:1593),c(2:49)]
train_three_mat= as.matrix(train_three[,-49])
train_three_target= train_three[,49]
train_three_target_mat= as.matrix(train_three_target)
test_three_mat= as.matrix(test_three)
fit_cvglmnet_three=cv.glmnet(train_three_mat,train_three_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_three)
## List of 11
## $ lambda : num [1:100] 2129 1940 1768 1611 1468 ...
## $ cvm : num [1:100] 6633989 5893781 5253810 4722500 4281064 ...
## $ cvsd : num [1:100] 238418 222786 199497 179753 163070 ...
## $ cvup : num [1:100] 6872407 6116567 5453307 4902253 4444134 ...
## $ cvlo : num [1:100] 6395571 5670995 5054313 4542747 4117994 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 2 2 2 3 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_three_mat, y = train_three_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 27610 25488 23555 21794 20189 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1607] 40 40 40 40 12 40 12 40 12 40 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 6 8 10 13 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1607] 0.0643 0.1229 0.1763 0.2249 0.0104 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 2 2 2 3 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2129 1940 1768 1611 1468 ...
## ..$ dev.ratio: num [1:100] 0 0.116 0.212 0.292 0.358 ...
## ..$ nulldev : num 1.01e+10
## ..$ npasses : int 7221
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_three_mat, y = train_three_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 1.04
## $ lambda.1se: num 9.66
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_three,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.441386e+03
## lag_7days_0 .
## lag_7days_1 .
## lag_7days_10 -2.916747e-03
## lag_7days_11 .
## lag_7days_12 .
## lag_7days_13 1.834746e-02
## lag_7days_14 4.877833e-02
## lag_7days_15 7.428903e-02
## lag_7days_16 .
## lag_7days_17 -9.338486e-03
## lag_7days_18 -9.120742e-02
## lag_7days_19 -9.573096e-02
## lag_7days_2 -1.289817e-02
## lag_7days_20 -1.767753e-01
## lag_7days_21 .
## lag_7days_22 -3.930973e-02
## lag_7days_23 .
## lag_7days_3 .
## lag_7days_4 .
## lag_7days_5 .
## lag_7days_6 7.323004e-02
## lag_7days_7 3.015668e-01
## lag_7days_8 .
## lag_7days_9 -3.814260e-02
## lag_2days_0 -6.524683e-01
## lag_2days_1 -1.903993e-01
## lag_2days_10 -2.311068e-01
## lag_2days_11 -2.532332e-01
## lag_2days_12 3.145126e-02
## lag_2days_13 4.478521e-01
## lag_2days_14 -8.601152e-02
## lag_2days_15 4.360256e-05
## lag_2days_16 9.720077e-02
## lag_2days_17 .
## lag_2days_18 .
## lag_2days_19 1.561741e-03
## lag_2days_2 .
## lag_2days_20 -2.301611e-01
## lag_2days_21 1.505341e-01
## lag_2days_22 2.459200e-01
## lag_2days_23 9.981160e-01
## lag_2days_3 1.240297e-01
## lag_2days_4 6.414610e-01
## lag_2days_5 .
## lag_2days_6 -1.342929e-04
## lag_2days_7 2.517353e-01
## lag_2days_8 .
## lag_2days_9 -4.250961e-01
plot(fit_cvglmnet_three)
fit_cvglmnet_three
##
## Call: cv.glmnet(x = train_three_mat, y = train_three_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 1.035 762035 66757 32
## 1se 9.656 822034 70472 17
test_three_pred=predict(fit_cvglmnet_three, newx=test_three_mat,s=c("lambda.min"))
test_three_actual=final_feature_set_three[c(1514:1593),c(50)]
test_three_mape=((test_three_actual-test_three_pred)/test_three_actual)*100
summary(test_three_mape)
## value
## Min. :-9.7974
## 1st Qu.:-4.1876
## Median :-0.6795
## Mean :-1.2845
## 3rd Qu.: 0.8234
## Max. :11.0652
boxplot(test_three_mape)
###-----------------------------------------------------------------------------------
###FOUR
str(final_feature_set_four)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25128 25820 24538 23839 25809 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_four<- final_feature_set_four[c(1:1513),c(2:50)]
str(train_four)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25128 25820 24538 23839 25809 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_four <- final_feature_set_four[c(1514:1593),c(2:49)]
train_four_mat= as.matrix(train_four[,-49])
train_four_target= train_four[,49]
train_four_target_mat= as.matrix(train_four_target)
test_four_mat= as.matrix(test_four)
fit_cvglmnet_four=cv.glmnet(train_four_mat,train_four_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_four)
## List of 11
## $ lambda : num [1:100] 2032 1852 1687 1537 1401 ...
## $ cvm : num [1:100] 6152913 5496900 4912375 4427074 4022798 ...
## $ cvsd : num [1:100] 284651 269078 237718 211084 188800 ...
## $ cvup : num [1:100] 6437564 5765978 5150092 4638158 4211598 ...
## $ cvlo : num [1:100] 5868263 5227821 4674657 4215990 3833998 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_four_mat, y = train_four_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 27292 25267 23422 21741 20209 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1657] 40 40 40 40 18 40 18 40 18 40 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 6 8 10 12 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1657] 0.0614 0.1173 0.1683 0.2147 0.0143 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2032 1852 1687 1537 1401 ...
## ..$ dev.ratio: num [1:100] 0 0.114 0.208 0.286 0.351 ...
## ..$ nulldev : num 9.34e+09
## ..$ npasses : int 7554
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_four_mat, y = train_four_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.469
## $ lambda.1se: num 10.1
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_four,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.605538e+03
## lag_7days_0 8.097210e-03
## lag_7days_1 1.459070e-02
## lag_7days_10 .
## lag_7days_11 -4.678500e-02
## lag_7days_12 -2.619254e-02
## lag_7days_13 3.715356e-02
## lag_7days_14 5.635711e-02
## lag_7days_15 1.260070e-01
## lag_7days_16 .
## lag_7days_17 -6.772824e-02
## lag_7days_18 -1.626645e-02
## lag_7days_19 -1.406507e-01
## lag_7days_2 -7.949454e-02
## lag_7days_20 -1.784303e-01
## lag_7days_21 5.820539e-03
## lag_7days_22 -4.137511e-02
## lag_7days_23 .
## lag_7days_3 .
## lag_7days_4 .
## lag_7days_5 .
## lag_7days_6 1.218977e-01
## lag_7days_7 2.992965e-01
## lag_7days_8 3.681700e-02
## lag_7days_9 -5.698921e-02
## lag_2days_0 -5.703007e-01
## lag_2days_1 -3.758718e-01
## lag_2days_10 -1.711866e-01
## lag_2days_11 -2.629477e-01
## lag_2days_12 4.652528e-02
## lag_2days_13 5.944737e-01
## lag_2days_14 -3.452018e-01
## lag_2days_15 1.106028e-01
## lag_2days_16 1.145510e-01
## lag_2days_17 .
## lag_2days_18 -4.740625e-02
## lag_2days_19 .
## lag_2days_2 -1.474708e-01
## lag_2days_20 -1.575286e-01
## lag_2days_21 1.327974e-01
## lag_2days_22 2.622905e-01
## lag_2days_23 9.261506e-01
## lag_2days_3 7.973923e-02
## lag_2days_4 8.708110e-01
## lag_2days_5 9.791695e-02
## lag_2days_6 -3.091708e-02
## lag_2days_7 2.786435e-01
## lag_2days_8 2.749321e-02
## lag_2days_9 -5.112061e-01
plot(fit_cvglmnet_four)
fit_cvglmnet_four
##
## Call: cv.glmnet(x = train_four_mat, y = train_four_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.469 745490 80083 40
## 1se 10.114 823992 94570 20
test_four_pred=predict(fit_cvglmnet_four, newx=test_four_mat,s=c("lambda.min"))
test_four_actual=final_feature_set_four[c(1514:1593),c(50)]
test_four_mape=((test_four_actual-test_four_pred)/test_four_actual)*100
summary(test_four_mape)
## value
## Min. :-11.2133
## 1st Qu.: -5.0244
## Median : -1.4924
## Mean : -1.8886
## 3rd Qu.: 0.8328
## Max. : 8.6561
boxplot(test_four_mape)
###-----------------------------------------------------------------------------------
###FIVE
str(final_feature_set_five)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25356 26036 24616 24358 26222 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_five<- final_feature_set_five[c(1:1513),c(2:50)]
str(train_five)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 25356 26036 24616 24358 26222 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_five <- final_feature_set_five[c(1514:1593),c(2:49)]
train_five_mat= as.matrix(train_five[,-49])
train_five_target= train_five[,49]
train_five_target_mat= as.matrix(train_five_target)
test_five_mat= as.matrix(test_five)
fit_cvglmnet_five=cv.glmnet(train_five_mat,train_five_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_five)
## List of 11
## $ lambda : num [1:82] 1851 1687 1537 1400 1276 ...
## $ cvm : num [1:82] 5619587 5063442 4577350 4168277 3768482 ...
## $ cvsd : num [1:82] 214063 200949 183032 169690 161681 ...
## $ cvup : num [1:82] 5833650 5264391 4760383 4337966 3930163 ...
## $ cvlo : num [1:82] 5405524 4862493 4394318 3998587 3606800 ...
## $ nzero : Named int [1:82] 0 2 2 2 3 3 3 4 5 5 ...
## ..- attr(*, "names")= chr [1:82] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_five_mat, y = train_five_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:82] 27143 25296 23599 22054 20336 ...
## .. ..- attr(*, "names")= chr [1:82] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1087] 37 38 37 38 37 38 19 37 38 19 ...
## .. .. ..@ p : int [1:83] 0 0 2 4 6 9 12 15 19 24 ...
## .. .. ..@ Dim : int [1:2] 48 82
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:82] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1087] 0.00763 0.04489 0.03155 0.06889 0.05322 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:82] 0 2 2 2 3 3 3 4 5 5 ...
## ..$ dim : int [1:2] 48 82
## ..$ lambda : num [1:82] 1851 1687 1537 1400 1276 ...
## ..$ dev.ratio: num [1:82] 0 0.104 0.19 0.262 0.334 ...
## ..$ nulldev : num 8.51e+09
## ..$ npasses : int 6373
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_five_mat, y = train_five_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.988
## $ lambda.1se: num 8.4
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_five,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.184974e+03
## lag_7days_0 .
## lag_7days_1 .
## lag_7days_10 .
## lag_7days_11 -4.629175e-02
## lag_7days_12 -6.875195e-05
## lag_7days_13 2.696575e-02
## lag_7days_14 2.233138e-02
## lag_7days_15 1.198788e-01
## lag_7days_16 .
## lag_7days_17 -2.612922e-04
## lag_7days_18 -8.254086e-02
## lag_7days_19 -6.238603e-02
## lag_7days_2 -8.145052e-02
## lag_7days_20 -2.149422e-01
## lag_7days_21 -4.166258e-02
## lag_7days_22 -1.685350e-02
## lag_7days_23 .
## lag_7days_3 .
## lag_7days_4 -5.476213e-02
## lag_7days_5 .
## lag_7days_6 2.092657e-01
## lag_7days_7 3.242225e-01
## lag_7days_8 .
## lag_7days_9 -3.018469e-02
## lag_2days_0 -5.433988e-01
## lag_2days_1 -2.720642e-01
## lag_2days_10 -2.217513e-01
## lag_2days_11 -2.092873e-01
## lag_2days_12 .
## lag_2days_13 6.102113e-01
## lag_2days_14 -2.124473e-01
## lag_2days_15 4.873015e-05
## lag_2days_16 5.694649e-02
## lag_2days_17 .
## lag_2days_18 -3.606971e-03
## lag_2days_19 .
## lag_2days_2 -3.191625e-01
## lag_2days_20 2.265303e-02
## lag_2days_21 .
## lag_2days_22 2.824068e-01
## lag_2days_23 8.522033e-01
## lag_2days_3 .
## lag_2days_4 4.739650e-01
## lag_2days_5 6.712546e-01
## lag_2days_6 .
## lag_2days_7 1.681913e-01
## lag_2days_8 .
## lag_2days_9 -4.523556e-01
plot(fit_cvglmnet_five)
fit_cvglmnet_five
##
## Call: cv.glmnet(x = train_five_mat, y = train_five_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.988 742654 71236 33
## 1se 8.396 807691 84719 20
test_five_pred=predict(fit_cvglmnet_five, newx=test_five_mat,s=c("lambda.min"))
test_five_actual=final_feature_set_five[c(1514:1593),c(50)]
test_five_mape=((test_five_actual-test_five_pred)/test_five_actual)*100
summary(test_five_mape)
## value
## Min. :-13.0146
## 1st Qu.: -5.8903
## Median : -2.3951
## Mean : -2.9651
## 3rd Qu.: 0.1424
## Max. : 3.8983
boxplot(test_five_mape)
###-----------------------------------------------------------------------------------
###SIX
str(final_feature_set_six)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 26339 26451 24478 25801 27290 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_six<- final_feature_set_six[c(1:1513),c(2:50)]
str(train_six)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 26339 26451 24478 25801 27290 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_six <- final_feature_set_six[c(1514:1593),c(2:49)]
train_six_mat= as.matrix(train_six[,-49])
train_six_target= train_six[,49]
train_six_target_mat= as.matrix(train_six_target)
test_six_mat= as.matrix(test_six)
fit_cvglmnet_six=cv.glmnet(train_six_mat,train_six_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_six)
## List of 11
## $ lambda : num [1:100] 1788 1629 1484 1352 1232 ...
## $ cvm : num [1:100] 6225079 5626962 5029301 4528622 4095181 ...
## $ cvsd : num [1:100] 210057 205899 189211 175367 164450 ...
## $ cvup : num [1:100] 6435136 5832860 5218512 4703990 4259631 ...
## $ cvlo : num [1:100] 6015021 5421063 4840090 4353255 3930731 ...
## $ nzero : Named int [1:100] 0 2 2 2 3 4 4 4 4 4 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_six_mat, y = train_six_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 27326 25271 23161 21239 19549 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1885] 20 35 20 35 20 35 20 21 35 20 ...
## .. .. ..@ p : int [1:101] 0 0 2 4 6 9 13 17 21 25 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1885] 0.0517 0.0179 0.0901 0.0476 0.125 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 2 2 2 3 4 4 4 4 4 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 1788 1629 1484 1352 1232 ...
## ..$ dev.ratio: num [1:100] 0 0.102 0.198 0.277 0.347 ...
## ..$ nulldev : num 9.45e+09
## ..$ npasses : int 6933
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_six_mat, y = train_six_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.599
## $ lambda.1se: num 8.11
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_six,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.835433e+03
## lag_7days_0 .
## lag_7days_1 .
## lag_7days_10 .
## lag_7days_11 -9.561707e-02
## lag_7days_12 -6.154767e-02
## lag_7days_13 .
## lag_7days_14 1.060653e-01
## lag_7days_15 1.677564e-01
## lag_7days_16 4.369148e-05
## lag_7days_17 -2.705289e-02
## lag_7days_18 -3.919448e-02
## lag_7days_19 .
## lag_7days_2 -1.489517e-03
## lag_7days_20 -3.464413e-01
## lag_7days_21 -1.319299e-02
## lag_7days_22 -4.321707e-02
## lag_7days_23 .
## lag_7days_3 .
## lag_7days_4 -2.225581e-01
## lag_7days_5 -3.732420e-01
## lag_7days_6 4.807807e-01
## lag_7days_7 5.290468e-01
## lag_7days_8 .
## lag_7days_9 -2.941580e-02
## lag_2days_0 -6.016840e-01
## lag_2days_1 -1.644845e-01
## lag_2days_10 -1.341004e-01
## lag_2days_11 -2.148008e-01
## lag_2days_12 2.614267e-02
## lag_2days_13 7.635056e-01
## lag_2days_14 -3.818932e-01
## lag_2days_15 7.330516e-03
## lag_2days_16 5.025293e-02
## lag_2days_17 -3.205026e-03
## lag_2days_18 -1.902415e-02
## lag_2days_19 4.557771e-02
## lag_2days_2 -4.735428e-01
## lag_2days_20 8.331482e-02
## lag_2days_21 .
## lag_2days_22 2.893238e-01
## lag_2days_23 7.728822e-01
## lag_2days_3 .
## lag_2days_4 6.425331e-01
## lag_2days_5 2.764444e-01
## lag_2days_6 3.760022e-01
## lag_2days_7 1.734098e-01
## lag_2days_8 .
## lag_2days_9 -5.855843e-01
plot(fit_cvglmnet_six)
fit_cvglmnet_six
##
## Call: cv.glmnet(x = train_six_mat, y = train_six_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.599 852240 75251 37
## 1se 8.106 924143 78994 22
test_six_pred=predict(fit_cvglmnet_six, newx=test_six_mat,s=c("lambda.min"))
test_six_actual=final_feature_set_six[c(1514:1593),c(50)]
test_six_mape=((test_six_actual-test_six_pred)/test_six_actual)*100
summary(test_six_mape)
## value
## Min. :-15.940
## 1st Qu.: -8.132
## Median : -3.488
## Mean : -4.216
## 3rd Qu.: -0.305
## Max. : 4.662
boxplot(test_six_mape)
###-----------------------------------------------------------------------------------
###SEVEN
str(final_feature_set_seven)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 28086 26853 23813 27336 28629 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_seven<- final_feature_set_seven[c(1:1513),c(2:50)]
str(train_seven)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 28086 26853 23813 27336 28629 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_seven <- final_feature_set_seven[c(1514:1593),c(2:49)]
train_seven_mat= as.matrix(train_seven[,-49])
train_seven_target= train_seven[,49]
train_seven_target_mat= as.matrix(train_seven_target)
test_seven_mat= as.matrix(test_seven)
fit_cvglmnet_seven=cv.glmnet(train_seven_mat,train_seven_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_seven)
## List of 11
## $ lambda : num [1:100] 2224 2026 1846 1682 1533 ...
## $ cvm : num [1:100] 9096234 8299609 7602208 7023430 6540847 ...
## $ cvsd : num [1:100] 573876 563022 540954 521679 505363 ...
## $ cvup : num [1:100] 9670109 8862630 8143162 7545109 7046209 ...
## $ cvlo : num [1:100] 8522358 7736587 7061254 6501751 6035484 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 2 2 2 2 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_seven_mat, y = train_seven_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 28714 26841 25134 23578 22161 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1944] 21 21 21 21 21 35 21 35 21 35 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 6 8 10 12 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1944] 0.0653 0.1248 0.179 0.2284 0.2689 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 2 2 2 2 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2224 2026 1846 1682 1533 ...
## ..$ dev.ratio: num [1:100] 0 0.092 0.168 0.232 0.285 ...
## ..$ nulldev : num 1.38e+10
## ..$ npasses : int 8127
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_seven_mat, y = train_seven_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.818
## $ lambda.1se: num 9.19
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_seven,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 4.213720e+03
## lag_7days_0 1.553823e-03
## lag_7days_1 8.198602e-03
## lag_7days_10 -2.189853e-02
## lag_7days_11 -1.425290e-01
## lag_7days_12 -1.004035e-01
## lag_7days_13 .
## lag_7days_14 4.382695e-02
## lag_7days_15 2.705547e-01
## lag_7days_16 3.757848e-02
## lag_7days_17 6.809470e-03
## lag_7days_18 -8.112733e-02
## lag_7days_19 -3.448698e-02
## lag_7days_2 .
## lag_7days_20 -4.429411e-01
## lag_7days_21 -9.623756e-03
## lag_7days_22 -6.159337e-03
## lag_7days_23 1.173344e-02
## lag_7days_3 8.883386e-02
## lag_7days_4 -2.512868e-01
## lag_7days_5 -7.271737e-01
## lag_7days_6 1.134809e-01
## lag_7days_7 1.097336e+00
## lag_7days_8 7.462851e-02
## lag_7days_9 -3.795986e-03
## lag_2days_0 -6.320963e-01
## lag_2days_1 -2.029060e-01
## lag_2days_10 -3.000203e-02
## lag_2days_11 -2.909890e-01
## lag_2days_12 .
## lag_2days_13 1.139903e+00
## lag_2days_14 -6.148372e-01
## lag_2days_15 .
## lag_2days_16 3.164487e-05
## lag_2days_17 .
## lag_2days_18 2.809591e-02
## lag_2days_19 .
## lag_2days_2 -6.475067e-01
## lag_2days_20 2.135037e-01
## lag_2days_21 2.203852e-02
## lag_2days_22 1.528029e-01
## lag_2days_23 8.756139e-01
## lag_2days_3 .
## lag_2days_4 8.359278e-01
## lag_2days_5 4.690849e-01
## lag_2days_6 .
## lag_2days_7 4.638267e-01
## lag_2days_8 .
## lag_2days_9 -7.718690e-01
plot(fit_cvglmnet_seven)
fit_cvglmnet_seven
##
## Call: cv.glmnet(x = train_seven_mat, y = train_seven_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.818 1402017 148754 39
## 1se 9.188 1545114 170621 23
test_seven_pred=predict(fit_cvglmnet_seven, newx=test_seven_mat,s=c("lambda.min"))
test_seven_actual=final_feature_set_seven[c(1514:1593),c(50)]
test_seven_mape=((test_seven_actual-test_seven_pred)/test_seven_actual)*100
summary(test_seven_mape)
## value
## Min. :-21.7672
## 1st Qu.:-11.1678
## Median : -5.2400
## Mean : -6.0696
## 3rd Qu.: -0.5244
## Max. : 4.9691
boxplot(test_seven_mape)
###-----------------------------------------------------------------------------------
###EIGHT
str(final_feature_set_eight)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 32702 30627 24677 32683 32817 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_eight<- final_feature_set_eight[c(1:1513),c(2:50)]
str(train_eight)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 32702 30627 24677 32683 32817 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_eight <- final_feature_set_eight[c(1514:1593),c(2:49)]
train_eight_mat= as.matrix(train_eight[,-49])
train_eight_target= train_eight[,49]
train_eight_target_mat= as.matrix(train_eight_target)
test_eight_mat= as.matrix(test_eight)
fit_cvglmnet_eight=cv.glmnet(train_eight_mat,train_eight_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_eight)
## List of 11
## $ lambda : num [1:100] 3018 2750 2506 2283 2080 ...
## $ cvm : num [1:100] 16815333 15368643 14081698 13013477 12126826 ...
## $ cvsd : num [1:100] 682409 655942 604892 562477 527565 ...
## $ cvup : num [1:100] 17497741 16024585 14686590 13575954 12654391 ...
## $ cvlo : num [1:100] 16132924 14712702 13476805 12451000 11599261 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_eight_mat, y = train_eight_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 32360 30252 28331 26581 24987 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1851] 22 22 22 22 22 22 22 22 22 22 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 6 7 8 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1851] 0.0652 0.1246 0.1787 0.228 0.2729 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 1 1 1 1 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3018 2750 2506 2283 2080 ...
## ..$ dev.ratio: num [1:100] 0 0.0917 0.1679 0.2311 0.2836 ...
## ..$ nulldev : num 2.55e+10
## ..$ npasses : int 9922
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_eight_mat, y = train_eight_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 1.47
## $ lambda.1se: num 11.4
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_eight,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 7.330748e+03
## lag_7days_0 1.267976e-01
## lag_7days_1 7.857544e-02
## lag_7days_10 .
## lag_7days_11 -2.159409e-01
## lag_7days_12 -1.437557e-01
## lag_7days_13 4.029037e-05
## lag_7days_14 9.058222e-02
## lag_7days_15 2.007918e-01
## lag_7days_16 1.804249e-01
## lag_7days_17 1.103982e-01
## lag_7days_18 -2.972658e-01
## lag_7days_19 -5.113459e-02
## lag_7days_2 .
## lag_7days_20 -5.570015e-01
## lag_7days_21 .
## lag_7days_22 .
## lag_7days_23 1.215561e-02
## lag_7days_3 1.356579e-01
## lag_7days_4 -3.099059e-01
## lag_7days_5 -1.275703e+00
## lag_7days_6 3.818896e-03
## lag_7days_7 9.520848e-01
## lag_7days_8 6.452427e-01
## lag_7days_9 .
## lag_2days_0 -1.150626e+00
## lag_2days_1 -1.143124e-01
## lag_2days_10 .
## lag_2days_11 -5.907724e-01
## lag_2days_12 -3.144197e-02
## lag_2days_13 1.966120e+00
## lag_2days_14 -1.087101e+00
## lag_2days_15 .
## lag_2days_16 .
## lag_2days_17 -7.555661e-03
## lag_2days_18 2.366089e-02
## lag_2days_19 .
## lag_2days_2 -9.256850e-01
## lag_2days_20 3.743133e-01
## lag_2days_21 1.661830e-02
## lag_2days_22 .
## lag_2days_23 1.137326e+00
## lag_2days_3 .
## lag_2days_4 1.259871e+00
## lag_2days_5 8.986738e-01
## lag_2days_6 .
## lag_2days_7 3.310840e-01
## lag_2days_8 .
## lag_2days_9 -8.377912e-01
plot(fit_cvglmnet_eight)
fit_cvglmnet_eight
##
## Call: cv.glmnet(x = train_eight_mat, y = train_eight_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 1.468 3042393 256439 35
## 1se 11.362 3282852 263436 21
test_eight_pred=predict(fit_cvglmnet_eight, newx=test_eight_mat,s=c("lambda.min"))
test_eight_actual=final_feature_set_eight[c(1514:1593),c(50)]
test_eight_mape=((test_eight_actual-test_eight_pred)/test_eight_actual)*100
summary(test_eight_mape)
## value
## Min. :-37.0460
## 1st Qu.:-14.4402
## Median : -6.0033
## Mean : -8.4192
## 3rd Qu.: -0.2982
## Max. : 6.4501
boxplot(test_eight_mape)
###-----------------------------------------------------------------------------------
###NINE
str(final_feature_set_nine)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 35788 33468 26693 35711 34991 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_nine<- final_feature_set_nine[c(1:1513),c(2:50)]
str(train_nine)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 35788 33468 26693 35711 34991 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_nine <- final_feature_set_nine[c(1514:1593),c(2:49)]
train_nine_mat= as.matrix(train_nine[,-49])
train_nine_target= train_nine[,49]
train_nine_target_mat= as.matrix(train_nine_target)
test_nine_mat= as.matrix(test_nine)
fit_cvglmnet_nine=cv.glmnet(train_nine_mat,train_nine_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_nine)
## List of 11
## $ lambda : num [1:100] 3334 3038 2768 2522 2298 ...
## $ cvm : num [1:100] 20868332 19143124 17573544 16270496 15188723 ...
## $ cvsd : num [1:100] 933904 889345 829813 784326 751259 ...
## $ cvup : num [1:100] 21802236 20032469 18403357 17054822 15939982 ...
## $ cvlo : num [1:100] 19934428 18253779 16743731 15486170 14437464 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_nine_mat, y = train_nine_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 34649 32411 30372 28514 26822 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1821] 23 23 23 23 23 23 23 23 23 23 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 6 7 8 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1821] 0.0646 0.1235 0.1771 0.226 0.2706 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 1 1 1 1 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3334 3038 2768 2522 2298 ...
## ..$ dev.ratio: num [1:100] 0 0.0901 0.1649 0.227 0.2786 ...
## ..$ nulldev : num 3.17e+10
## ..$ npasses : int 10439
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_nine_mat, y = train_nine_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 1.02
## $ lambda.1se: num 15.1
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_nine,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.718633e+03
## lag_7days_0 2.086867e-01
## lag_7days_1 9.032731e-02
## lag_7days_10 3.690016e-05
## lag_7days_11 -1.040195e-01
## lag_7days_12 -1.417918e-01
## lag_7days_13 6.295169e-02
## lag_7days_14 8.693410e-03
## lag_7days_15 1.646476e-01
## lag_7days_16 1.925828e-01
## lag_7days_17 2.155329e-01
## lag_7days_18 -4.868884e-01
## lag_7days_19 -1.397980e-01
## lag_7days_2 .
## lag_7days_20 -5.513717e-01
## lag_7days_21 3.565167e-05
## lag_7days_22 -2.839221e-04
## lag_7days_23 1.054419e-01
## lag_7days_3 3.028586e-01
## lag_7days_4 -3.516612e-01
## lag_7days_5 -1.597656e+00
## lag_7days_6 .
## lag_7days_7 8.883695e-01
## lag_7days_8 6.378832e-01
## lag_7days_9 9.518961e-02
## lag_2days_0 -1.200900e+00
## lag_2days_1 -1.885816e-01
## lag_2days_10 .
## lag_2days_11 -6.480952e-01
## lag_2days_12 -2.360297e-03
## lag_2days_13 2.455591e+00
## lag_2days_14 -1.376235e+00
## lag_2days_15 .
## lag_2days_16 .
## lag_2days_17 -4.586285e-02
## lag_2days_18 1.739779e-01
## lag_2days_19 .
## lag_2days_2 -1.196516e+00
## lag_2days_20 3.051719e-01
## lag_2days_21 .
## lag_2days_22 -1.226181e-01
## lag_2days_23 1.283074e+00
## lag_2days_3 .
## lag_2days_4 1.442846e+00
## lag_2days_5 1.120653e+00
## lag_2days_6 .
## lag_2days_7 3.926476e-01
## lag_2days_8 .
## lag_2days_9 -1.014835e+00
plot(fit_cvglmnet_nine)
fit_cvglmnet_nine
##
## Call: cv.glmnet(x = train_nine_mat, y = train_nine_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 1.018 3919278 464555 38
## 1se 15.118 4318617 527353 23
test_nine_pred=predict(fit_cvglmnet_nine, newx=test_nine_mat,s=c("lambda.min"))
test_nine_actual=final_feature_set_nine[c(1514:1593),c(50)]
test_nine_mape=((test_nine_actual-test_nine_pred)/test_nine_actual)*100
summary(test_nine_mape)
## value
## Min. :-44.69365
## 1st Qu.:-15.42506
## Median : -6.59472
## Mean : -9.29374
## 3rd Qu.: 0.06868
## Max. : 9.84154
boxplot(test_nine_mape)
###-----------------------------------------------------------------------------------
###TEN
str(final_feature_set_ten)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36843 34793 28136 36707 35653 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_ten<- final_feature_set_ten[c(1:1513),c(2:50)]
str(train_ten)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36843 34793 28136 36707 35653 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_ten <- final_feature_set_ten[c(1514:1593),c(2:49)]
train_ten_mat= as.matrix(train_ten[,-49])
train_ten_target= train_ten[,49]
train_ten_target_mat= as.matrix(train_ten_target)
test_ten_mat= as.matrix(test_ten)
fit_cvglmnet_ten=cv.glmnet(train_ten_mat,train_ten_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_ten)
## List of 11
## $ lambda : num [1:100] 3199 2915 2656 2420 2205 ...
## $ cvm : num [1:100] 20224000 18585812 17142425 15944073 14948502 ...
## $ cvsd : num [1:100] 693491 676896 651883 636921 629634 ...
## $ cvup : num [1:100] 20917491 19262708 17794308 16580994 15578137 ...
## $ cvlo : num [1:100] 19530509 17908916 16490542 15307152 14318868 ...
## $ nzero : Named int [1:100] 0 1 1 2 2 2 2 2 3 2 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_ten_mat, y = train_ten_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35427 33279 31323 29532 27869 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1845] 23 23 2 23 2 23 2 23 2 23 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 4 6 8 10 12 15 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1845] 0.062 0.119 0.005 0.165 0.029 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 2 2 2 2 2 3 2 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3199 2915 2656 2420 2205 ...
## ..$ dev.ratio: num [1:100] 0 0.0857 0.1569 0.216 0.2652 ...
## ..$ nulldev : num 3.07e+10
## ..$ npasses : int 10730
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_ten_mat, y = train_ten_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 2.06
## $ lambda.1se: num 17.5
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_ten,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 9.220968e+03
## lag_7days_0 1.824474e-01
## lag_7days_1 1.713680e-01
## lag_7days_10 2.623220e-03
## lag_7days_11 .
## lag_7days_12 -4.579813e-02
## lag_7days_13 3.904312e-02
## lag_7days_14 .
## lag_7days_15 1.740142e-01
## lag_7days_16 1.151886e-01
## lag_7days_17 1.805163e-01
## lag_7days_18 -3.747651e-01
## lag_7days_19 -2.626780e-01
## lag_7days_2 .
## lag_7days_20 -4.303769e-01
## lag_7days_21 .
## lag_7days_22 .
## lag_7days_23 9.480647e-02
## lag_7days_3 7.766754e-02
## lag_7days_4 -1.828961e-01
## lag_7days_5 -1.430453e+00
## lag_7days_6 -4.078316e-02
## lag_7days_7 7.542418e-01
## lag_7days_8 5.303225e-01
## lag_7days_9 6.677903e-02
## lag_2days_0 -1.120336e+00
## lag_2days_1 -7.911109e-02
## lag_2days_10 .
## lag_2days_11 -4.039030e-01
## lag_2days_12 .
## lag_2days_13 2.391040e+00
## lag_2days_14 -1.323252e+00
## lag_2days_15 .
## lag_2days_16 .
## lag_2days_17 .
## lag_2days_18 1.453638e-01
## lag_2days_19 .
## lag_2days_2 -1.233587e+00
## lag_2days_20 1.260627e-01
## lag_2days_21 .
## lag_2days_22 -1.440705e-02
## lag_2days_23 1.242179e+00
## lag_2days_3 .
## lag_2days_4 1.272659e+00
## lag_2days_5 1.071504e+00
## lag_2days_6 .
## lag_2days_7 4.898339e-01
## lag_2days_8 -9.207114e-03
## lag_2days_9 -1.216740e+00
plot(fit_cvglmnet_ten)
fit_cvglmnet_ten
##
## Call: cv.glmnet(x = train_ten_mat, y = train_ten_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 2.057 4075445 470846 34
## 1se 17.476 4534800 499786 22
test_ten_pred=predict(fit_cvglmnet_ten, newx=test_ten_mat,s=c("lambda.min"))
test_ten_actual=final_feature_set_ten[c(1514:1593),c(50)]
test_ten_mape=((test_ten_actual-test_ten_pred)/test_ten_actual)*100
summary(test_ten_mape)
## value
## Min. :-44.6420
## 1st Qu.:-15.8791
## Median : -5.8754
## Mean : -8.9503
## 3rd Qu.: -0.3455
## Max. : 10.5915
boxplot(test_ten_mape)
###-----------------------------------------------------------------------------------
###ELEVEN
str(final_feature_set_eleven)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 37140 35383 29059 37289 36012 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_eleven<- final_feature_set_eleven[c(1:1513),c(2:50)]
str(train_eleven)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 37140 35383 29059 37289 36012 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_eleven <- final_feature_set_eleven[c(1514:1593),c(2:49)]
train_eleven_mat= as.matrix(train_eleven[,-49])
train_eleven_target= train_eleven[,49]
train_eleven_target_mat= as.matrix(train_eleven_target)
test_eleven_mat= as.matrix(test_eleven)
fit_cvglmnet_eleven=cv.glmnet(train_eleven_mat,train_eleven_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_eleven)
## List of 11
## $ lambda : num [1:100] 3144 2865 2611 2379 2167 ...
## $ cvm : num [1:100] 20263360 18717311 17318987 16158104 15194346 ...
## $ cvsd : num [1:100] 827443 814196 794982 784404 780514 ...
## $ cvup : num [1:100] 21090803 19531507 18113968 16942508 15974860 ...
## $ cvlo : num [1:100] 19435917 17903115 16524005 15373699 14413832 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 1 2 3 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_eleven_mat, y = train_eleven_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35966 33772 31773 29952 28292 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1913] 2 2 2 2 2 2 2 29 2 23 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 6 8 11 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1913] 0.0619 0.1184 0.1698 0.2167 0.2594 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 1 2 3 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3144 2865 2611 2379 2167 ...
## ..$ dev.ratio: num [1:100] 0 0.0826 0.1512 0.2082 0.2555 ...
## ..$ nulldev : num 3.07e+10
## ..$ npasses : int 10246
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_eleven_mat, y = train_eleven_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 2.22
## $ lambda.1se: num 15.6
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_eleven,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 9.777994e+03
## lag_7days_0 2.207053e-01
## lag_7days_1 1.637093e-01
## lag_7days_10 2.063553e-02
## lag_7days_11 .
## lag_7days_12 .
## lag_7days_13 1.451375e-02
## lag_7days_14 .
## lag_7days_15 2.263492e-01
## lag_7days_16 8.297126e-02
## lag_7days_17 2.040503e-01
## lag_7days_18 -4.340645e-01
## lag_7days_19 -2.796411e-01
## lag_7days_2 .
## lag_7days_20 -3.572778e-01
## lag_7days_21 .
## lag_7days_22 .
## lag_7days_23 1.098611e-01
## lag_7days_3 7.481197e-02
## lag_7days_4 -1.577888e-01
## lag_7days_5 -1.404238e+00
## lag_7days_6 -1.091088e-01
## lag_7days_7 7.070999e-01
## lag_7days_8 5.299376e-01
## lag_7days_9 7.538817e-03
## lag_2days_0 -1.062526e+00
## lag_2days_1 -1.788577e-01
## lag_2days_10 .
## lag_2days_11 -3.441289e-01
## lag_2days_12 .
## lag_2days_13 2.394805e+00
## lag_2days_14 -1.286204e+00
## lag_2days_15 .
## lag_2days_16 .
## lag_2days_17 5.734633e-03
## lag_2days_18 1.711865e-01
## lag_2days_19 .
## lag_2days_2 -1.191657e+00
## lag_2days_20 3.042636e-02
## lag_2days_21 .
## lag_2days_22 -1.848652e-02
## lag_2days_23 1.261201e+00
## lag_2days_3 .
## lag_2days_4 1.240868e+00
## lag_2days_5 1.082947e+00
## lag_2days_6 .
## lag_2days_7 5.688578e-01
## lag_2days_8 -9.827586e-02
## lag_2days_9 -1.245420e+00
plot(fit_cvglmnet_eleven)
fit_cvglmnet_eleven
##
## Call: cv.glmnet(x = train_eleven_mat, y = train_eleven_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 2.218 4313820 484142 34
## 1se 15.650 4741524 556873 21
test_eleven_pred=predict(fit_cvglmnet_eleven, newx=test_eleven_mat,s=c("lambda.min"))
test_eleven_actual=final_feature_set_eleven[c(1514:1593),c(50)]
test_eleven_mape=((test_eleven_actual-test_eleven_pred)/test_eleven_actual)*100
summary(test_eleven_mape)
## value
## Min. :-42.6864
## 1st Qu.:-14.6099
## Median : -5.5406
## Mean : -8.1482
## 3rd Qu.: 0.3266
## Max. : 11.5066
boxplot(test_eleven_mape)
###-----------------------------------------------------------------------------------
###TWELVE
str(final_feature_set_twelve)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 34623 34131 28947 35723 34256 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_twelve<- final_feature_set_twelve[c(1:1513),c(2:50)]
str(train_twelve)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 34623 34131 28947 35723 34256 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_twelve <- final_feature_set_twelve[c(1514:1593),c(2:49)]
train_twelve_mat= as.matrix(train_twelve[,-49])
train_twelve_target= train_twelve[,49]
train_twelve_target_mat= as.matrix(train_twelve_target)
test_twelve_mat= as.matrix(test_twelve)
fit_cvglmnet_twelve=cv.glmnet(train_twelve_mat,train_twelve_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_twelve)
## List of 11
## $ lambda : num [1:100] 2912 2654 2418 2203 2007 ...
## $ cvm : num [1:100] 17705476 16334562 15137781 14144309 13319593 ...
## $ cvsd : num [1:100] 493427 477483 467512 465051 468224 ...
## $ cvup : num [1:100] 18198903 16812045 15605293 14609360 13787817 ...
## $ cvlo : num [1:100] 17212049 15857079 14670269 13679258 12851368 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 2 3 3 4 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_twelve_mat, y = train_twelve_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 34717 32587 30646 28878 27266 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1990] 4 4 4 4 4 4 29 2 4 29 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 7 10 13 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1990] 0.0614 0.1173 0.1682 0.2146 0.2569 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 2 3 3 4 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2912 2654 2418 2203 2007 ...
## ..$ dev.ratio: num [1:100] 0 0.0811 0.1485 0.2044 0.2509 ...
## ..$ nulldev : num 2.68e+10
## ..$ npasses : int 10838
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_twelve_mat, y = train_twelve_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.976
## $ lambda.1se: num 13.2
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_twelve,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.805817e+03
## lag_7days_0 2.244550e-01
## lag_7days_1 2.449434e-01
## lag_7days_10 1.740376e-04
## lag_7days_11 -1.849618e-01
## lag_7days_12 1.340486e-01
## lag_7days_13 1.687491e-01
## lag_7days_14 .
## lag_7days_15 1.466460e-01
## lag_7days_16 5.158811e-02
## lag_7days_17 2.629476e-01
## lag_7days_18 -6.185353e-01
## lag_7days_19 -2.084700e-01
## lag_7days_2 .
## lag_7days_20 -3.042084e-01
## lag_7days_21 1.132003e-01
## lag_7days_22 -4.938385e-02
## lag_7days_23 1.641163e-01
## lag_7days_3 2.936851e-01
## lag_7days_4 -4.028527e-01
## lag_7days_5 -1.270304e+00
## lag_7days_6 -2.208762e-01
## lag_7days_7 6.785973e-01
## lag_7days_8 4.436480e-01
## lag_7days_9 1.721609e-02
## lag_2days_0 -8.630329e-01
## lag_2days_1 -1.233591e-01
## lag_2days_10 .
## lag_2days_11 -3.517745e-01
## lag_2days_12 1.308840e-01
## lag_2days_13 2.228544e+00
## lag_2days_14 -1.267696e+00
## lag_2days_15 2.785550e-05
## lag_2days_16 .
## lag_2days_17 8.357874e-02
## lag_2days_18 2.551994e-01
## lag_2days_19 .
## lag_2days_2 -1.302060e+00
## lag_2days_20 -1.500738e-02
## lag_2days_21 .
## lag_2days_22 -4.076703e-01
## lag_2days_23 1.443992e+00
## lag_2days_3 -9.239978e-02
## lag_2days_4 1.338694e+00
## lag_2days_5 9.433346e-01
## lag_2days_6 -8.061766e-02
## lag_2days_7 7.339311e-01
## lag_2days_8 -2.767478e-01
## lag_2days_9 -1.126473e+00
plot(fit_cvglmnet_twelve)
fit_cvglmnet_twelve
##
## Call: cv.glmnet(x = train_twelve_mat, y = train_twelve_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.976 3776168 365022 42
## 1se 13.207 4099879 393666 22
test_twelve_pred=predict(fit_cvglmnet_twelve, newx=test_twelve_mat,s=c("lambda.min"))
test_twelve_actual=final_feature_set_twelve[c(1514:1593),c(50)]
test_twelve_mape=((test_twelve_actual-test_twelve_pred)/test_twelve_actual)*100
summary(test_twelve_mape)
## value
## Min. :-36.168
## 1st Qu.:-13.204
## Median : -4.679
## Mean : -6.957
## 3rd Qu.: 1.483
## Max. : 14.295
boxplot(test_twelve_mape)
###-----------------------------------------------------------------------------------
###THIRTEEN
str(final_feature_set_thirteen)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36135 33828 28945 36053 34651 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_thirteen<- final_feature_set_thirteen[c(1:1513),c(2:50)]
str(train_thirteen)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36135 33828 28945 36053 34651 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_thirteen <- final_feature_set_thirteen[c(1514:1593),c(2:49)]
train_thirteen_mat= as.matrix(train_thirteen[,-49])
train_thirteen_target= train_thirteen[,49]
train_thirteen_target_mat= as.matrix(train_thirteen_target)
test_thirteen_mat= as.matrix(test_thirteen)
fit_cvglmnet_thirteen=cv.glmnet(train_thirteen_mat,train_thirteen_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_thirteen)
## List of 11
## $ lambda : num [1:100] 2913 2654 2419 2204 2008 ...
## $ cvm : num [1:100] 18368861 17108394 15908292 14911984 14084867 ...
## $ cvsd : num [1:100] 762704 696536 599647 520603 459505 ...
## $ cvup : num [1:100] 19131565 17804930 16507939 15432587 14544372 ...
## $ cvlo : num [1:100] 17606156 16411858 15308645 14391381 13625361 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 2 3 3 4 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_thirteen_mat, y = train_thirteen_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35038 32928 31005 29253 27656 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1970] 5 5 5 5 5 5 29 5 29 40 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 7 10 13 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1970] 0.0602 0.1151 0.1651 0.2107 0.2522 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 2 3 3 4 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2913 2654 2419 2204 2008 ...
## ..$ dev.ratio: num [1:100] 0 0.0782 0.1431 0.197 0.2417 ...
## ..$ nulldev : num 2.79e+10
## ..$ npasses : int 10076
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_thirteen_mat, y = train_thirteen_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 1.18
## $ lambda.1se: num 11
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_thirteen,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 9.364538e+03
## lag_7days_0 2.146186e-01
## lag_7days_1 2.486145e-01
## lag_7days_10 2.690966e-02
## lag_7days_11 -5.385322e-01
## lag_7days_12 -3.027279e-01
## lag_7days_13 9.507385e-01
## lag_7days_14 .
## lag_7days_15 1.278362e-02
## lag_7days_16 4.587822e-02
## lag_7days_17 2.939627e-01
## lag_7days_18 -6.232381e-01
## lag_7days_19 -2.343787e-01
## lag_7days_2 .
## lag_7days_20 -2.434069e-01
## lag_7days_21 7.592717e-02
## lag_7days_22 -2.881550e-03
## lag_7days_23 1.507673e-01
## lag_7days_3 3.621211e-01
## lag_7days_4 -3.962525e-01
## lag_7days_5 -1.388743e+00
## lag_7days_6 -2.188510e-01
## lag_7days_7 5.997653e-01
## lag_7days_8 6.410823e-01
## lag_7days_9 4.164781e-03
## lag_2days_0 -9.915419e-01
## lag_2days_1 -9.311769e-02
## lag_2days_10 .
## lag_2days_11 -2.596666e-01
## lag_2days_12 3.341427e-02
## lag_2days_13 2.243416e+00
## lag_2days_14 -1.216514e+00
## lag_2days_15 7.258157e-05
## lag_2days_16 .
## lag_2days_17 6.133742e-02
## lag_2days_18 3.263296e-01
## lag_2days_19 -5.499797e-02
## lag_2days_2 -1.374669e+00
## lag_2days_20 -2.467977e-02
## lag_2days_21 .
## lag_2days_22 -4.202671e-01
## lag_2days_23 1.509168e+00
## lag_2days_3 -8.443631e-02
## lag_2days_4 1.447616e+00
## lag_2days_5 1.056320e+00
## lag_2days_6 -1.118069e-01
## lag_2days_7 6.812869e-01
## lag_2days_8 -2.848379e-01
## lag_2days_9 -1.186262e+00
plot(fit_cvglmnet_thirteen)
fit_cvglmnet_thirteen
##
## Call: cv.glmnet(x = train_thirteen_mat, y = train_thirteen_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 1.176 4188101 379616 43
## 1se 10.968 4527091 400749 21
test_thirteen_pred=predict(fit_cvglmnet_thirteen, newx=test_thirteen_mat,s=c("lambda.min"))
test_thirteen_actual=final_feature_set_thirteen[c(1514:1593),c(50)]
test_thirteen_mape=((test_thirteen_actual-test_thirteen_pred)/test_thirteen_actual)*100
summary(test_thirteen_mape)
## value
## Min. :-36.784
## 1st Qu.:-13.726
## Median : -4.893
## Mean : -7.064
## 3rd Qu.: 1.378
## Max. : 14.963
boxplot(test_thirteen_mape)
###-----------------------------------------------------------------------------------
###FOURTEEN
str(final_feature_set_fourteen)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36556 33525 29090 36383 34934 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_fourteen<- final_feature_set_fourteen[c(1:1513),c(2:50)]
str(train_fourteen)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36556 33525 29090 36383 34934 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_fourteen <- final_feature_set_fourteen[c(1514:1593),c(2:49)]
train_fourteen_mat= as.matrix(train_fourteen[,-49])
train_fourteen_target= train_fourteen[,49]
train_fourteen_target_mat= as.matrix(train_fourteen_target)
test_fourteen_mat= as.matrix(test_fourteen)
fit_cvglmnet_fourteen=cv.glmnet(train_fourteen_mat,train_fourteen_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_fourteen)
## List of 11
## $ lambda : num [1:100] 3112 2835 2584 2354 2145 ...
## $ cvm : num [1:100] 20653084 19128093 17757943 16621603 15678441 ...
## $ cvsd : num [1:100] 643962 618181 605207 603412 609392 ...
## $ cvup : num [1:100] 21297046 19746273 18363150 17225015 16287832 ...
## $ cvlo : num [1:100] 20009123 18509912 17152736 16018191 15069049 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 4 5 5 5 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_fourteen_mat, y = train_fourteen_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35695 33527 31551 29751 28111 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:2071] 7 7 7 7 7 7 8 39 40 7 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 9 14 19 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:2071] 0.0609 0.1165 0.1671 0.2132 0.2552 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 4 5 5 5 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3112 2835 2584 2354 2145 ...
## ..$ dev.ratio: num [1:100] 0 0.0794 0.1453 0.2 0.2454 ...
## ..$ nulldev : num 3.13e+10
## ..$ npasses : int 11180
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_fourteen_mat, y = train_fourteen_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.495
## $ lambda.1se: num 12.9
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_fourteen,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 1.038728e+04
## lag_7days_0 2.903669e-01
## lag_7days_1 2.469218e-01
## lag_7days_10 1.463136e-01
## lag_7days_11 -5.742013e-01
## lag_7days_12 -2.568519e-01
## lag_7days_13 2.378743e-01
## lag_7days_14 2.493112e-01
## lag_7days_15 3.522595e-01
## lag_7days_16 1.602154e-01
## lag_7days_17 3.268497e-01
## lag_7days_18 -8.469623e-01
## lag_7days_19 -1.980426e-01
## lag_7days_2 -5.355320e-02
## lag_7days_20 -3.400111e-01
## lag_7days_21 2.626478e-01
## lag_7days_22 -1.823369e-01
## lag_7days_23 2.565456e-01
## lag_7days_3 5.740245e-01
## lag_7days_4 -7.044449e-01
## lag_7days_5 -1.443639e+00
## lag_7days_6 -1.769318e-01
## lag_7days_7 6.514149e-01
## lag_7days_8 6.083293e-01
## lag_7days_9 1.377554e-02
## lag_2days_0 -1.225237e+00
## lag_2days_1 -1.233257e-01
## lag_2days_10 2.195578e-03
## lag_2days_11 -4.050088e-01
## lag_2days_12 7.075404e-02
## lag_2days_13 2.368469e+00
## lag_2days_14 -1.398714e+00
## lag_2days_15 1.343056e-01
## lag_2days_16 -1.221919e-03
## lag_2days_17 -1.884288e-04
## lag_2days_18 4.742606e-01
## lag_2days_19 -7.958247e-02
## lag_2days_2 -1.496203e+00
## lag_2days_20 -8.279890e-02
## lag_2days_21 -1.622895e-02
## lag_2days_22 -4.679129e-01
## lag_2days_23 1.651701e+00
## lag_2days_3 -1.556087e-01
## lag_2days_4 1.899370e+00
## lag_2days_5 1.216334e+00
## lag_2days_6 -2.383846e-01
## lag_2days_7 6.941956e-01
## lag_2days_8 -2.701915e-01
## lag_2days_9 -1.197536e+00
plot(fit_cvglmnet_fourteen)
fit_cvglmnet_fourteen
##
## Call: cv.glmnet(x = train_fourteen_mat, y = train_fourteen_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.495 4704692 405633 48
## 1se 12.858 5075639 500022 20
test_fourteen_pred=predict(fit_cvglmnet_fourteen, newx=test_fourteen_mat,s=c("lambda.min"))
test_fourteen_actual=final_feature_set_fourteen[c(1514:1593),c(50)]
test_fourteen_mape=((test_fourteen_actual-test_fourteen_pred)/test_fourteen_actual)*100
summary(test_fourteen_mape)
## value
## Min. :-40.177
## 1st Qu.:-13.725
## Median : -4.824
## Mean : -7.002
## 3rd Qu.: 1.293
## Max. : 13.971
boxplot(test_fourteen_mape)
###-----------------------------------------------------------------------------------
###FIFTEEN
str(final_feature_set_fifteen)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36190 32951 29448 36013 34785 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_fifteen<- final_feature_set_fifteen[c(1:1513),c(2:50)]
str(train_fifteen)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 36190 32951 29448 36013 34785 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_fifteen <- final_feature_set_fifteen[c(1514:1593),c(2:49)]
train_fifteen_mat= as.matrix(train_fifteen[,-49])
train_fifteen_target= train_fifteen[,49]
train_fifteen_target_mat= as.matrix(train_fifteen_target)
test_fifteen_mat= as.matrix(test_fifteen)
fit_cvglmnet_fifteen=cv.glmnet(train_fifteen_mat,train_fifteen_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_fifteen)
## List of 11
## $ lambda : num [1:100] 3132 2854 2600 2369 2159 ...
## $ cvm : num [1:100] 20547050 18949713 17562199 16411416 15456213 ...
## $ cvsd : num [1:100] 605646 565353 533779 512617 499488 ...
## $ cvup : num [1:100] 21152696 19515066 18095978 16924032 15955701 ...
## $ cvlo : num [1:100] 19941404 18384360 17028421 15898799 14956725 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 3 4 4 4 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_fifteen_mat, y = train_fifteen_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35577 33350 31321 29472 27787 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1982] 8 8 8 8 8 8 39 40 7 8 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 8 12 16 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1982] 0.0623 0.1191 0.1708 0.218 0.2609 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 3 4 4 4 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3132 2854 2600 2369 2159 ...
## ..$ dev.ratio: num [1:100] 0 0.0811 0.1484 0.2043 0.2507 ...
## ..$ nulldev : num 3.11e+10
## ..$ npasses : int 11642
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_fifteen_mat, y = train_fifteen_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.724
## $ lambda.1se: num 8.92
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_fifteen,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 1.000321e+04
## lag_7days_0 2.004299e-01
## lag_7days_1 2.811767e-01
## lag_7days_10 6.782427e-02
## lag_7days_11 -6.058612e-01
## lag_7days_12 -1.398031e-01
## lag_7days_13 9.529670e-02
## lag_7days_14 1.638045e-01
## lag_7days_15 4.232302e-01
## lag_7days_16 2.697513e-01
## lag_7days_17 3.544868e-01
## lag_7days_18 -7.815686e-01
## lag_7days_19 -2.521243e-01
## lag_7days_2 -2.304720e-02
## lag_7days_20 -3.036687e-01
## lag_7days_21 2.378384e-01
## lag_7days_22 -1.856611e-01
## lag_7days_23 2.474251e-01
## lag_7days_3 6.580167e-01
## lag_7days_4 -7.775484e-01
## lag_7days_5 -1.429664e+00
## lag_7days_6 -2.262121e-01
## lag_7days_7 7.203553e-01
## lag_7days_8 6.206672e-01
## lag_7days_9 9.332906e-04
## lag_2days_0 -1.198680e+00
## lag_2days_1 -1.320550e-01
## lag_2days_10 2.737211e-02
## lag_2days_11 -3.398427e-01
## lag_2days_12 6.103432e-03
## lag_2days_13 2.224016e+00
## lag_2days_14 -1.351285e+00
## lag_2days_15 1.832800e-01
## lag_2days_16 .
## lag_2days_17 .
## lag_2days_18 4.249650e-01
## lag_2days_19 -3.248916e-02
## lag_2days_2 -1.489861e+00
## lag_2days_20 -2.642511e-02
## lag_2days_21 -1.264022e-02
## lag_2days_22 -4.626310e-01
## lag_2days_23 1.606975e+00
## lag_2days_3 -7.580925e-02
## lag_2days_4 1.868205e+00
## lag_2days_5 1.161067e+00
## lag_2days_6 -2.236911e-01
## lag_2days_7 6.240665e-01
## lag_2days_8 -2.417576e-01
## lag_2days_9 -1.201612e+00
plot(fit_cvglmnet_fifteen)
fit_cvglmnet_fifteen
##
## Call: cv.glmnet(x = train_fifteen_mat, y = train_fifteen_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.724 4572174 286408 46
## 1se 8.920 4834449 307949 23
test_fifteen_pred=predict(fit_cvglmnet_fifteen, newx=test_fifteen_mat,s=c("lambda.min"))
test_fifteen_actual=final_feature_set_fifteen[c(1514:1593),c(50)]
test_fifteen_mape=((test_fifteen_actual-test_fifteen_pred)/test_fifteen_actual)*100
summary(test_fifteen_mape)
## value
## Min. :-37.570
## 1st Qu.:-13.180
## Median : -5.106
## Mean : -6.855
## 3rd Qu.: 0.470
## Max. : 13.201
boxplot(test_fifteen_mape)
###-----------------------------------------------------------------------------------
###SIXTEEN
str(final_feature_set_sixteen)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 37032 33656 30569 36849 35690 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_sixteen<- final_feature_set_sixteen[c(1:1513),c(2:50)]
str(train_sixteen)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 37032 33656 30569 36849 35690 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_sixteen <- final_feature_set_sixteen[c(1514:1593),c(2:49)]
train_sixteen_mat= as.matrix(train_sixteen[,-49])
train_sixteen_target= train_sixteen[,49]
train_sixteen_target_mat= as.matrix(train_sixteen_target)
test_sixteen_mat= as.matrix(test_sixteen)
fit_cvglmnet_sixteen=cv.glmnet(train_sixteen_mat,train_sixteen_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_sixteen)
## List of 11
## $ lambda : num [1:100] 3123 2845 2592 2362 2152 ...
## $ cvm : num [1:100] 19848643 18313790 16934866 15790243 14840097 ...
## $ cvsd : num [1:100] 990439 953556 883223 825220 777700 ...
## $ cvup : num [1:100] 20839083 19267346 17818088 16615463 15617797 ...
## $ cvlo : num [1:100] 18858204 17360234 16051643 14965023 14062397 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 2 2 3 4 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_sixteen_mat, y = train_sixteen_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35735 33514 31491 29648 27968 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1982] 8 8 8 8 8 8 40 8 40 8 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 7 9 12 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1982] 0.0621 0.1187 0.1703 0.2173 0.2602 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 2 2 3 4 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3123 2845 2592 2362 2152 ...
## ..$ dev.ratio: num [1:100] 0 0.0832 0.1522 0.2095 0.2571 ...
## ..$ nulldev : num 3.01e+10
## ..$ npasses : int 11270
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_sixteen_mat, y = train_sixteen_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 1.05
## $ lambda.1se: num 9.76
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_sixteen,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 9552.88073215
## lag_7days_0 0.15433008
## lag_7days_1 0.23565925
## lag_7days_10 .
## lag_7days_11 -0.62362663
## lag_7days_12 -0.06271398
## lag_7days_13 0.02068546
## lag_7days_14 0.19814524
## lag_7days_15 0.22082879
## lag_7days_16 0.37622352
## lag_7days_17 0.53416053
## lag_7days_18 -0.77847305
## lag_7days_19 -0.31579543
## lag_7days_2 .
## lag_7days_20 -0.16760994
## lag_7days_21 0.10817179
## lag_7days_22 -0.09502273
## lag_7days_23 0.21360453
## lag_7days_3 0.60318212
## lag_7days_4 -0.62507128
## lag_7days_5 -1.51844789
## lag_7days_6 -0.20541894
## lag_7days_7 0.79443953
## lag_7days_8 0.58060746
## lag_7days_9 .
## lag_2days_0 -1.15686402
## lag_2days_1 -0.21666937
## lag_2days_10 .
## lag_2days_11 -0.34470271
## lag_2days_12 .
## lag_2days_13 2.08016128
## lag_2days_14 -1.19723118
## lag_2days_15 0.04609762
## lag_2days_16 .
## lag_2days_17 0.01257260
## lag_2days_18 0.49930123
## lag_2days_19 -0.03635838
## lag_2days_2 -1.37427082
## lag_2days_20 .
## lag_2days_21 .
## lag_2days_22 -0.37426619
## lag_2days_23 1.48628375
## lag_2days_3 .
## lag_2days_4 1.79047191
## lag_2days_5 1.02027382
## lag_2days_6 -0.06995046
## lag_2days_7 0.44338159
## lag_2days_8 -0.15105396
## lag_2days_9 -1.14804017
plot(fit_cvglmnet_sixteen)
fit_cvglmnet_sixteen
##
## Call: cv.glmnet(x = train_sixteen_mat, y = train_sixteen_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 1.047 4288902 287127 39
## 1se 9.760 4569988 322420 22
test_sixteen_pred=predict(fit_cvglmnet_sixteen, newx=test_sixteen_mat,s=c("lambda.min"))
test_sixteen_actual=final_feature_set_sixteen[c(1514:1593),c(50)]
test_sixteen_mape=((test_sixteen_actual-test_sixteen_pred)/test_sixteen_actual)*100
summary(test_sixteen_mape)
## value
## Min. :-34.65512
## 1st Qu.:-12.15313
## Median : -5.39570
## Mean : -6.58900
## 3rd Qu.: -0.00221
## Max. : 11.77699
boxplot(test_sixteen_mape)
###-----------------------------------------------------------------------------------
###SEVENTEEN
str(final_feature_set_seventeen)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 37774 35045 32006 37368 36566 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_seventeen<- final_feature_set_seventeen[c(1:1513),c(2:50)]
str(train_seventeen)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 37774 35045 32006 37368 36566 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_seventeen <- final_feature_set_seventeen[c(1514:1593),c(2:49)]
train_seventeen_mat= as.matrix(train_seventeen[,-49])
train_seventeen_target= train_seventeen[,49]
train_seventeen_target_mat= as.matrix(train_seventeen_target)
test_seventeen_mat= as.matrix(test_seventeen)
fit_cvglmnet_seventeen=cv.glmnet(train_seventeen_mat,train_seventeen_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_seventeen)
## List of 11
## $ lambda : num [1:100] 3043 2773 2526 2302 2097 ...
## $ cvm : num [1:100] 17757508 16306755 14998721 13912886 13011514 ...
## $ cvsd : num [1:100] 545396 551090 560426 577681 599675 ...
## $ cvup : num [1:100] 18302904 16857845 15559147 14490568 13611189 ...
## $ cvlo : num [1:100] 17212113 15755666 14438295 13335205 12411839 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 1 1 2 3 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_seventeen_mat, y = train_seventeen_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35862 33566 31474 29568 27832 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1931] 9 9 9 9 9 9 9 38 9 34 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 5 6 8 11 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1931] 0.064 0.122 0.175 0.224 0.268 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 1 1 2 3 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 3043 2773 2526 2302 2097 ...
## ..$ dev.ratio: num [1:100] 0 0.0882 0.1615 0.2223 0.2728 ...
## ..$ nulldev : num 2.7e+10
## ..$ npasses : int 10041
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_seventeen_mat, y = train_seventeen_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.584
## $ lambda.1se: num 15.1
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_seventeen,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 9.641623e+03
## lag_7days_0 2.716494e-02
## lag_7days_1 1.992734e-01
## lag_7days_10 .
## lag_7days_11 -6.350487e-01
## lag_7days_12 -1.918230e-02
## lag_7days_13 1.107885e-02
## lag_7days_14 3.156739e-01
## lag_7days_15 9.747940e-03
## lag_7days_16 1.413534e-01
## lag_7days_17 8.441449e-01
## lag_7days_18 -6.783357e-01
## lag_7days_19 -3.584915e-01
## lag_7days_2 .
## lag_7days_20 -1.678095e-01
## lag_7days_21 1.994991e-01
## lag_7days_22 -1.934341e-01
## lag_7days_23 2.314956e-01
## lag_7days_3 6.450756e-01
## lag_7days_4 -5.740714e-01
## lag_7days_5 -1.493785e+00
## lag_7days_6 -5.610769e-03
## lag_7days_7 7.538689e-01
## lag_7days_8 4.677477e-01
## lag_7days_9 .
## lag_2days_0 -1.119084e+00
## lag_2days_1 -2.208706e-01
## lag_2days_10 .
## lag_2days_11 -2.788594e-01
## lag_2days_12 1.722230e-02
## lag_2days_13 1.890338e+00
## lag_2days_14 -1.279397e+00
## lag_2days_15 .
## lag_2days_16 .
## lag_2days_17 1.829986e-01
## lag_2days_18 6.272736e-01
## lag_2days_19 -1.602356e-01
## lag_2days_2 -1.132628e+00
## lag_2days_20 4.756332e-02
## lag_2days_21 4.541146e-02
## lag_2days_22 -4.798722e-01
## lag_2days_23 1.423948e+00
## lag_2days_3 -3.379820e-02
## lag_2days_4 1.522326e+00
## lag_2days_5 1.052677e+00
## lag_2days_6 .
## lag_2days_7 3.585776e-01
## lag_2days_8 -1.996700e-01
## lag_2days_9 -1.016688e+00
plot(fit_cvglmnet_seventeen)
fit_cvglmnet_seventeen
##
## Call: cv.glmnet(x = train_seventeen_mat, y = train_seventeen_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.584 3415848 440836 41
## 1se 15.145 3841377 490663 20
test_seventeen_pred=predict(fit_cvglmnet_seventeen, newx=test_seventeen_mat,s=c("lambda.min"))
test_seventeen_actual=final_feature_set_seventeen[c(1514:1593),c(50)]
test_seventeen_mape=((test_seventeen_actual-test_seventeen_pred)/test_seventeen_actual)*100
summary(test_seventeen_mape)
## value
## Min. :-27.1074
## 1st Qu.:-10.6183
## Median : -4.5894
## Mean : -5.6271
## 3rd Qu.: -0.4155
## Max. : 11.1816
boxplot(test_seventeen_mape)
###-----------------------------------------------------------------------------------
###EIGHTEEN
str(final_feature_set_eighteen)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 35931 34407 31706 35697 35056 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_eighteen<- final_feature_set_eighteen[c(1:1513),c(2:50)]
str(train_eighteen)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 35931 34407 31706 35697 35056 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_eighteen <- final_feature_set_eighteen[c(1514:1593),c(2:49)]
train_eighteen_mat= as.matrix(train_eighteen[,-49])
train_eighteen_target= train_eighteen[,49]
train_eighteen_target_mat= as.matrix(train_eighteen_target)
test_eighteen_mat= as.matrix(test_eighteen)
fit_cvglmnet_eighteen=cv.glmnet(train_eighteen_mat,train_eighteen_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_eighteen)
## List of 11
## $ lambda : num [1:100] 2696 2457 2238 2040 1858 ...
## $ cvm : num [1:100] 13691950 12502868 11474554 10620867 9908630 ...
## $ cvsd : num [1:100] 458782 436463 419314 408128 402324 ...
## $ cvup : num [1:100] 14150732 12939330 11893868 11028994 10310954 ...
## $ cvlo : num [1:100] 13233168 12066405 11055240 10212739 9506306 ...
## $ nzero : Named int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_eighteen_mat, y = train_eighteen_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35810 33493 31382 29459 27706 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1829] 10 10 10 10 10 34 10 34 10 34 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 3 4 6 8 10 12 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1829] 0.0647 0.1237 0.1774 0.2264 0.2618 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 1 1 2 2 2 2 2 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2696 2457 2238 2040 1858 ...
## ..$ dev.ratio: num [1:100] 0 0.0901 0.1649 0.2271 0.2786 ...
## ..$ nulldev : num 2.07e+10
## ..$ npasses : int 8889
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_eighteen_mat, y = train_eighteen_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.75
## $ lambda.1se: num 8.43
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_eighteen,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.715660e+03
## lag_7days_0 .
## lag_7days_1 1.629731e-01
## lag_7days_10 -1.008977e-02
## lag_7days_11 -5.589541e-01
## lag_7days_12 .
## lag_7days_13 4.648877e-02
## lag_7days_14 3.106312e-01
## lag_7days_15 6.128081e-03
## lag_7days_16 1.476445e-01
## lag_7days_17 2.397781e-01
## lag_7days_18 -1.227296e-01
## lag_7days_19 -2.276554e-01
## lag_7days_2 .
## lag_7days_20 -1.890585e-01
## lag_7days_21 1.424165e-01
## lag_7days_22 -1.069654e-01
## lag_7days_23 1.080548e-01
## lag_7days_3 3.681971e-01
## lag_7days_4 -3.632538e-01
## lag_7days_5 -1.118653e+00
## lag_7days_6 .
## lag_7days_7 6.766388e-01
## lag_7days_8 3.229940e-01
## lag_7days_9 .
## lag_2days_0 -8.935375e-01
## lag_2days_1 -1.969431e-01
## lag_2days_10 .
## lag_2days_11 -3.021151e-01
## lag_2days_12 3.852725e-02
## lag_2days_13 1.523522e+00
## lag_2days_14 -9.426052e-01
## lag_2days_15 .
## lag_2days_16 -1.483685e-01
## lag_2days_17 .
## lag_2days_18 8.903833e-01
## lag_2days_19 -1.053488e-01
## lag_2days_2 -9.832138e-01
## lag_2days_20 .
## lag_2days_21 1.237435e-01
## lag_2days_22 -5.244490e-01
## lag_2days_23 1.223694e+00
## lag_2days_3 .
## lag_2days_4 1.361437e+00
## lag_2days_5 7.492488e-01
## lag_2days_6 .
## lag_2days_7 3.110860e-01
## lag_2days_8 -1.294832e-01
## lag_2days_9 -8.627291e-01
plot(fit_cvglmnet_eighteen)
fit_cvglmnet_eighteen
##
## Call: cv.glmnet(x = train_eighteen_mat, y = train_eighteen_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.750 2373808 209398 37
## 1se 8.427 2571875 227339 24
test_eighteen_pred=predict(fit_cvglmnet_eighteen, newx=test_eighteen_mat,s=c("lambda.min"))
test_eighteen_actual=final_feature_set_eighteen[c(1514:1593),c(50)]
test_eighteen_mape=((test_eighteen_actual-test_eighteen_pred)/test_eighteen_actual)*100
summary(test_eighteen_mape)
## value
## Min. :-22.6800
## 1st Qu.: -8.7443
## Median : -3.7347
## Mean : -4.6950
## 3rd Qu.: -0.5032
## Max. : 8.2474
boxplot(test_eighteen_mape)
###-----------------------------------------------------------------------------------
###NINETEEN
str(final_feature_set_nineteen)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 34710 33494 31306 34482 33913 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_nineteen<- final_feature_set_nineteen[c(1:1513),c(2:50)]
str(train_nineteen)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 34710 33494 31306 34482 33913 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_nineteen <- final_feature_set_nineteen[c(1514:1593),c(2:49)]
train_nineteen_mat= as.matrix(train_nineteen[,-49])
train_nineteen_target= train_nineteen[,49]
train_nineteen_target_mat= as.matrix(train_nineteen_target)
test_nineteen_mat= as.matrix(test_nineteen)
fit_cvglmnet_nineteen=cv.glmnet(train_nineteen_mat,train_nineteen_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_nineteen)
## List of 11
## $ lambda : num [1:100] 2305 2100 1914 1744 1589 ...
## $ cvm : num [1:100] 10444855 9609285 8857517 8141401 7444627 ...
## $ cvsd : num [1:100] 560106 553197 530260 513806 486650 ...
## $ cvup : num [1:100] 11004960 10162482 9387777 8655207 7931277 ...
## $ cvlo : num [1:100] 9884749 9056088 8327257 7627595 6957977 ...
## $ nzero : Named int [1:100] 0 1 1 2 3 3 3 3 3 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_nineteen_mat, y = train_nineteen_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 35850 33870 32065 30047 27849 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:2013] 10 10 10 35 10 35 37 10 35 37 ...
## .. .. ..@ p : int [1:101] 0 0 1 2 4 7 10 13 16 19 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:2013] 0.0553 0.1057 0.1444 0.0177 0.1728 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 1 1 2 3 3 3 3 3 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2305 2100 1914 1744 1589 ...
## ..$ dev.ratio: num [1:100] 0 0.086 0.157 0.227 0.294 ...
## ..$ nulldev : num 1.59e+10
## ..$ npasses : int 9100
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_nineteen_mat, y = train_nineteen_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.532
## $ lambda.1se: num 10.5
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_nineteen,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 7.923909e+03
## lag_7days_0 1.221874e-02
## lag_7days_1 1.627446e-01
## lag_7days_10 -5.451120e-02
## lag_7days_11 -5.554864e-01
## lag_7days_12 .
## lag_7days_13 1.230754e-01
## lag_7days_14 1.663756e-01
## lag_7days_15 1.650270e-01
## lag_7days_16 1.357187e-01
## lag_7days_17 1.956275e-02
## lag_7days_18 -2.233044e-01
## lag_7days_19 4.821804e-02
## lag_7days_2 .
## lag_7days_20 -9.351693e-02
## lag_7days_21 1.106889e-01
## lag_7days_22 -2.199388e-01
## lag_7days_23 1.606596e-01
## lag_7days_3 3.299555e-01
## lag_7days_4 -4.045631e-01
## lag_7days_5 -9.033866e-01
## lag_7days_6 -2.733928e-02
## lag_7days_7 6.098833e-01
## lag_7days_8 3.366069e-01
## lag_7days_9 .
## lag_2days_0 -6.727636e-01
## lag_2days_1 -3.487088e-01
## lag_2days_10 -6.335044e-03
## lag_2days_11 -2.630658e-01
## lag_2days_12 9.549740e-02
## lag_2days_13 1.323250e+00
## lag_2days_14 -9.250582e-01
## lag_2days_15 .
## lag_2days_16 -1.895091e-02
## lag_2days_17 -3.449875e-07
## lag_2days_18 2.103100e-01
## lag_2days_19 4.729375e-01
## lag_2days_2 -7.352291e-01
## lag_2days_20 -9.353567e-03
## lag_2days_21 1.390276e-01
## lag_2days_22 -6.310484e-01
## lag_2days_23 1.254286e+00
## lag_2days_3 -3.946332e-02
## lag_2days_4 1.113185e+00
## lag_2days_5 6.955619e-01
## lag_2days_6 5.559108e-02
## lag_2days_7 1.936246e-01
## lag_2days_8 -1.097770e-01
## lag_2days_9 -7.184019e-01
plot(fit_cvglmnet_nineteen)
fit_cvglmnet_nineteen
##
## Call: cv.glmnet(x = train_nineteen_mat, y = train_nineteen_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.532 1968837 244338 44
## 1se 10.453 2201389 295019 22
test_nineteen_pred=predict(fit_cvglmnet_nineteen, newx=test_nineteen_mat,s=c("lambda.min"))
test_nineteen_actual=final_feature_set_nineteen[c(1514:1593),c(50)]
test_nineteen_mape=((test_nineteen_actual-test_nineteen_pred)/test_nineteen_actual)*100
summary(test_nineteen_mape)
## value
## Min. :-18.137
## 1st Qu.: -7.471
## Median : -2.867
## Mean : -3.644
## 3rd Qu.: -0.247
## Max. : 6.446
boxplot(test_nineteen_mape)
###-----------------------------------------------------------------------------------
###TWENTY
str(final_feature_set_twenty)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 33908 32624 30870 33527 33132 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_twenty<- final_feature_set_twenty[c(1:1513),c(2:50)]
str(train_twenty)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 33908 32624 30870 33527 33132 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_twenty <- final_feature_set_twenty[c(1514:1593),c(2:49)]
train_twenty_mat= as.matrix(train_twenty[,-49])
train_twenty_target= train_twenty[,49]
train_twenty_target_mat= as.matrix(train_twenty_target)
test_twenty_mat= as.matrix(test_twenty)
fit_cvglmnet_twenty=cv.glmnet(train_twenty_mat,train_twenty_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_twenty)
## List of 11
## $ lambda : num [1:92] 2316 2110 1923 1752 1596 ...
## $ cvm : num [1:92] 10017585 9163178 8358004 7569883 6899979 ...
## $ cvsd : num [1:92] 310277 299983 275435 245131 218652 ...
## $ cvup : num [1:92] 10327862 9463161 8633439 7815014 7118631 ...
## $ cvlo : num [1:92] 9707308 8863194 8082569 7324752 6681326 ...
## $ nzero : Named int [1:92] 0 1 2 3 3 4 4 4 4 3 ...
## ..- attr(*, "names")= chr [1:92] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_twenty_mat, y = train_twenty_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:92] 35730 33431 31195 28754 26523 ...
## .. ..- attr(*, "names")= chr [1:92] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1523] 14 14 38 14 37 38 14 37 38 13 ...
## .. .. ..@ p : int [1:93] 0 0 1 3 6 9 13 17 21 25 ...
## .. .. ..@ Dim : int [1:2] 48 92
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:92] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1523] 0.06556 0.11988 0.00944 0.15466 0.01611 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:92] 0 1 2 3 3 4 4 4 4 3 ...
## ..$ dim : int [1:2] 48 92
## ..$ lambda : num [1:92] 2316 2110 1923 1752 1596 ...
## ..$ dev.ratio: num [1:92] 0 0.0907 0.1708 0.2505 0.3168 ...
## ..$ nulldev : num 1.52e+10
## ..$ npasses : int 8942
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_twenty_mat, y = train_twenty_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.488
## $ lambda.1se: num 7.24
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_twenty,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 7.306695e+03
## lag_7days_0 4.050886e-02
## lag_7days_1 1.057808e-01
## lag_7days_10 -5.905346e-06
## lag_7days_11 -6.124683e-01
## lag_7days_12 .
## lag_7days_13 9.803221e-02
## lag_7days_14 1.613635e-01
## lag_7days_15 2.312850e-01
## lag_7days_16 5.860415e-02
## lag_7days_17 4.229896e-02
## lag_7days_18 -2.518009e-01
## lag_7days_19 -1.237587e-01
## lag_7days_2 -2.910384e-02
## lag_7days_20 -2.249375e-02
## lag_7days_21 2.457057e-01
## lag_7days_22 -1.576667e-01
## lag_7days_23 1.673565e-01
## lag_7days_3 4.014003e-01
## lag_7days_4 -4.116266e-01
## lag_7days_5 -8.314738e-01
## lag_7days_6 -4.817789e-02
## lag_7days_7 5.476991e-01
## lag_7days_8 3.330808e-01
## lag_7days_9 -1.070589e-05
## lag_2days_0 -4.619947e-01
## lag_2days_1 -2.040391e-01
## lag_2days_10 .
## lag_2days_11 -1.142823e-01
## lag_2days_12 3.157555e-02
## lag_2days_13 1.181636e+00
## lag_2days_14 -8.648592e-01
## lag_2days_15 1.936634e-04
## lag_2days_16 -7.113432e-03
## lag_2days_17 -4.284410e-05
## lag_2days_18 1.669711e-01
## lag_2days_19 -1.079033e-01
## lag_2days_2 -1.010963e+00
## lag_2days_20 4.823897e-01
## lag_2days_21 3.235214e-01
## lag_2days_22 -6.534048e-01
## lag_2days_23 1.144241e+00
## lag_2days_3 -3.466721e-02
## lag_2days_4 9.490884e-01
## lag_2days_5 8.116980e-01
## lag_2days_6 -1.304602e-03
## lag_2days_7 1.591579e-01
## lag_2days_8 -1.138166e-01
## lag_2days_9 -6.663804e-01
plot(fit_cvglmnet_twenty)
fit_cvglmnet_twenty
##
## Call: cv.glmnet(x = train_twenty_mat, y = train_twenty_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.488 1727034 157553 46
## 1se 7.240 1881790 178167 22
test_twenty_pred=predict(fit_cvglmnet_twenty, newx=test_twenty_mat,s=c("lambda.min"))
test_twenty_actual=final_feature_set_twenty[c(1514:1593),c(50)]
test_twenty_mape=((test_twenty_actual-test_twenty_pred)/test_twenty_actual)*100
summary(test_twenty_mape)
## value
## Min. :-16.7291
## 1st Qu.: -6.7509
## Median : -2.5457
## Mean : -3.2308
## 3rd Qu.: -0.4838
## Max. : 6.4037
boxplot(test_twenty_mape)
###-----------------------------------------------------------------------------------
###TWENTYONE
str(final_feature_set_TWENTYone)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 33049 32098 30463 32634 32333 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_TWENTYone<- final_feature_set_TWENTYone[c(1:1513),c(2:50)]
str(train_TWENTYone)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 33049 32098 30463 32634 32333 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_TWENTYone <- final_feature_set_TWENTYone[c(1514:1593),c(2:49)]
train_TWENTYone_mat= as.matrix(train_TWENTYone[,-49])
train_TWENTYone_target= train_TWENTYone[,49]
train_TWENTYone_target_mat= as.matrix(train_TWENTYone_target)
test_TWENTYone_mat= as.matrix(test_TWENTYone)
fit_cvglmnet_TWENTYone=cv.glmnet(train_TWENTYone_mat,train_TWENTYone_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_TWENTYone)
## List of 11
## $ lambda : num [1:99] 2294 2090 1904 1735 1581 ...
## $ cvm : num [1:99] 9791263 8920433 7998075 7212501 6559604 ...
## $ cvsd : num [1:99] 395712 376599 335749 296598 264586 ...
## $ cvup : num [1:99] 10186975 9297031 8333824 7509099 6824190 ...
## $ cvlo : num [1:99] 9395551 8543834 7662326 6915904 6295019 ...
## $ nzero : Named int [1:99] 0 2 2 2 2 2 2 2 2 2 ...
## ..- attr(*, "names")= chr [1:99] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_TWENTYone_mat, y = train_TWENTYone_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:99] 35087 32751 30115 27713 25524 ...
## .. ..- attr(*, "names")= chr [1:99] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1829] 14 38 14 38 14 38 14 38 14 38 ...
## .. .. ..@ p : int [1:100] 0 0 2 4 6 8 10 12 14 16 ...
## .. .. ..@ Dim : int [1:2] 48 99
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:99] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1829] 0.06267 0.00395 0.10023 0.04155 0.13444 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:99] 0 2 2 2 2 2 2 2 2 2 ...
## ..$ dim : int [1:2] 48 99
## ..$ lambda : num [1:99] 2294 2090 1904 1735 1581 ...
## ..$ dev.ratio: num [1:99] 0 0.0933 0.1892 0.2689 0.335 ...
## ..$ nulldev : num 1.48e+10
## ..$ npasses : int 7987
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_TWENTYone_mat, y = train_TWENTYone_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.252
## $ lambda.1se: num 4.5
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_TWENTYone,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.011188e+03
## lag_7days_0 3.644993e-02
## lag_7days_1 1.698731e-01
## lag_7days_10 -2.174567e-02
## lag_7days_11 -5.723124e-01
## lag_7days_12 .
## lag_7days_13 9.997523e-02
## lag_7days_14 2.062232e-01
## lag_7days_15 1.153361e-01
## lag_7days_16 8.843494e-02
## lag_7days_17 1.072500e-01
## lag_7days_18 -3.441725e-01
## lag_7days_19 -1.359434e-01
## lag_7days_2 -1.147126e-01
## lag_7days_20 -2.451180e-01
## lag_7days_21 5.308450e-01
## lag_7days_22 -1.278132e-01
## lag_7days_23 1.229770e-01
## lag_7days_3 3.408638e-01
## lag_7days_4 -1.638956e-01
## lag_7days_5 -1.028374e+00
## lag_7days_6 .
## lag_7days_7 5.039297e-01
## lag_7days_8 3.541188e-01
## lag_7days_9 .
## lag_2days_0 -4.523696e-01
## lag_2days_1 -2.410062e-01
## lag_2days_10 -4.395039e-03
## lag_2days_11 -1.677079e-01
## lag_2days_12 7.630930e-02
## lag_2days_13 1.120423e+00
## lag_2days_14 -8.552402e-01
## lag_2days_15 3.121559e-02
## lag_2days_16 -3.235326e-03
## lag_2days_17 -2.427621e-02
## lag_2days_18 2.328724e-01
## lag_2days_19 -1.523647e-01
## lag_2days_2 -9.035821e-01
## lag_2days_20 1.541492e-01
## lag_2days_21 6.386354e-01
## lag_2days_22 -7.057835e-01
## lag_2days_23 1.175914e+00
## lag_2days_3 .
## lag_2days_4 1.046724e+00
## lag_2days_5 5.401853e-01
## lag_2days_6 2.248224e-02
## lag_2days_7 1.909940e-01
## lag_2days_8 -7.521699e-02
## lag_2days_9 -6.477339e-01
plot(fit_cvglmnet_TWENTYone)
fit_cvglmnet_TWENTYone
##
## Call: cv.glmnet(x = train_TWENTYone_mat, y = train_TWENTYone_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.252 1623170 92301 44
## 1se 4.503 1707509 98669 28
test_TWENTYone_pred=predict(fit_cvglmnet_TWENTYone, newx=test_TWENTYone_mat,s=c("lambda.min"))
test_TWENTYone_actual=final_feature_set_TWENTYone[c(1514:1593),c(50)]
test_TWENTYone_mape=((test_TWENTYone_actual-test_TWENTYone_pred)/test_TWENTYone_actual)*100
summary(test_TWENTYone_mape)
## value
## Min. :-17.4586
## 1st Qu.: -6.4964
## Median : -2.8144
## Mean : -3.4422
## 3rd Qu.: -0.2452
## Max. : 6.5431
boxplot(test_TWENTYone_mape)
###-----------------------------------------------------------------------------------
###TWENTYTWO
str(final_feature_set_twentytwo)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 33220 32177 30538 32791 32523 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_twentytwo<- final_feature_set_twentytwo[c(1:1513),c(2:50)]
str(train_twentytwo)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 33220 32177 30538 32791 32523 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_twentytwo <- final_feature_set_twentytwo[c(1514:1593),c(2:49)]
train_twentytwo_mat= as.matrix(train_twentytwo[,-49])
train_twentytwo_target= train_twentytwo[,49]
train_twentytwo_target_mat= as.matrix(train_twentytwo_target)
test_twentytwo_mat= as.matrix(test_twentytwo)
fit_cvglmnet_twentytwo=cv.glmnet(train_twentytwo_mat,train_twentytwo_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_twentytwo)
## List of 11
## $ lambda : num [1:100] 2136 1946 1773 1615 1472 ...
## $ cvm : num [1:100] 8922467 8062128 7243647 6561103 5993567 ...
## $ cvsd : num [1:100] 234743 228163 220015 213659 209054 ...
## $ cvup : num [1:100] 9157211 8290291 7463662 6774762 6202621 ...
## $ cvlo : num [1:100] 8687724 7833964 7023631 6347444 5784513 ...
## $ nzero : Named int [1:100] 0 3 3 3 3 3 3 3 3 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_twentytwo_mat, y = train_twentytwo_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 34451 31983 29513 27260 25203 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1855] 14 16 40 14 16 40 14 16 40 14 ...
## .. .. ..@ p : int [1:101] 0 0 3 6 9 12 15 18 21 24 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1855] 0.00501 0.04477 0.02472 0.03674 0.04818 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 3 3 3 3 3 3 3 3 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2136 1946 1773 1615 1472 ...
## ..$ dev.ratio: num [1:100] 0 0.1 0.192 0.268 0.332 ...
## ..$ nulldev : num 1.35e+10
## ..$ npasses : int 7907
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_twentytwo_mat, y = train_twentytwo_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.373
## $ lambda.1se: num 7.33
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_twentytwo,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.520625e+03
## lag_7days_0 7.479722e-02
## lag_7days_1 1.057761e-01
## lag_7days_10 -5.335138e-02
## lag_7days_11 -4.549992e-01
## lag_7days_12 -6.142264e-02
## lag_7days_13 1.229532e-01
## lag_7days_14 1.823194e-01
## lag_7days_15 4.983586e-02
## lag_7days_16 2.024387e-01
## lag_7days_17 7.481926e-02
## lag_7days_18 -3.009693e-01
## lag_7days_19 -1.974967e-01
## lag_7days_2 -9.343987e-02
## lag_7days_20 -3.209183e-01
## lag_7days_21 3.325940e-01
## lag_7days_22 .
## lag_7days_23 2.479723e-01
## lag_7days_3 2.741141e-01
## lag_7days_4 -1.798263e-01
## lag_7days_5 -8.760706e-01
## lag_7days_6 .
## lag_7days_7 4.721560e-01
## lag_7days_8 3.246067e-01
## lag_7days_9 -9.788320e-06
## lag_2days_0 -5.133232e-01
## lag_2days_1 -1.677879e-01
## lag_2days_10 .
## lag_2days_11 -4.784869e-02
## lag_2days_12 8.004522e-02
## lag_2days_13 1.037969e+00
## lag_2days_14 -8.150469e-01
## lag_2days_15 6.652781e-03
## lag_2days_16 .
## lag_2days_17 -2.523212e-04
## lag_2days_18 1.694732e-01
## lag_2days_19 -6.987326e-02
## lag_2days_2 -8.513186e-01
## lag_2days_20 9.520534e-03
## lag_2days_21 3.539480e-01
## lag_2days_22 -3.884438e-01
## lag_2days_23 1.209709e+00
## lag_2days_3 -5.910940e-04
## lag_2days_4 9.801574e-01
## lag_2days_5 5.656298e-01
## lag_2days_6 3.607354e-03
## lag_2days_7 1.659675e-01
## lag_2days_8 -3.823756e-02
## lag_2days_9 -7.093280e-01
plot(fit_cvglmnet_twentytwo)
fit_cvglmnet_twentytwo
##
## Call: cv.glmnet(x = train_twentytwo_mat, y = train_twentytwo_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.373 1629266 141949 44
## 1se 7.326 1754296 171026 20
test_twentytwo_pred=predict(fit_cvglmnet_twentytwo, newx=test_twentytwo_mat,s=c("lambda.min"))
test_twentytwo_actual=final_feature_set_twentytwo[c(1514:1593),c(50)]
test_twentytwo_mape=((test_twentytwo_actual-test_twentytwo_pred)/test_twentytwo_actual)*100
summary(test_twentytwo_mape)
## value
## Min. :-17.3479
## 1st Qu.: -6.6931
## Median : -2.9432
## Mean : -3.4672
## 3rd Qu.: -0.1018
## Max. : 6.8285
boxplot(test_twentytwo_mape)
###-----------------------------------------------------------------------------------
###TWENTYTHREE
str(final_feature_set_twentythree)
## Classes 'data.table' and 'data.frame': 1593 obs. of 50 variables:
## $ date : Date, format: "2016-01-08" "2016-01-09" ...
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 32022 30760 28858 31148 30907 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "date"
train_twentythree<- final_feature_set_twentythree[c(1:1513),c(2:50)]
str(train_twentythree)
## Classes 'data.table' and 'data.frame': 1513 obs. of 49 variables:
## $ lag_7days_0 : num 26277 26225 27614 27103 29244 ...
## $ lag_7days_1 : num 24992 24709 25779 25440 27637 ...
## $ lag_7days_10: num 27225 32102 28780 38607 37776 ...
## $ lag_7days_11: num 28908 33432 30204 39660 38664 ...
## $ lag_7days_12: num 28789 32911 30588 38416 37522 ...
## $ lag_7days_13: num 29368 32888 30903 39279 37821 ...
## $ lag_7days_14: num 29548 32796 31056 39662 38086 ...
## $ lag_7days_15: num 29391 32595 30980 39211 37835 ...
## $ lag_7days_16: num 30735 33358 32297 39760 38599 ...
## $ lag_7days_17: num 32048 34388 33463 39633 38670 ...
## $ lag_7days_18: num 31438 33591 33151 37621 36955 ...
## $ lag_7days_19: num 30728 32649 32450 35954 35306 ...
## $ lag_7days_2 : num 23533 23772 24566 24395 26336 ...
## $ lag_7days_20: num 30166 31898 32094 35114 34302 ...
## $ lag_7days_21: num 29461 31049 31315 34066 33401 ...
## $ lag_7days_22: num 29243 30906 31014 33687 33359 ...
## $ lag_7days_23: num 28069 29621 29462 31734 31630 ...
## $ lag_7days_3 : num 22465 22921 23878 23961 25870 ...
## $ lag_7days_4 : num 22003 22871 23511 23861 25975 ...
## $ lag_7days_5 : num 21957 23326 23672 24418 26318 ...
## $ lag_7days_6 : num 22204 23605 23827 25709 27109 ...
## $ lag_7days_7 : num 21844 24023 23405 27455 28570 ...
## $ lag_7days_8 : num 23095 26930 24639 32982 33106 ...
## $ lag_7days_9 : num 25202 30044 26802 37176 36657 ...
## $ lag_2days_0 : num 29189 28764 28602 29907 28890 ...
## $ lag_2days_1 : num 27614 27285 27112 28062 27134 ...
## $ lag_2days_10: num 37213 36501 36843 34793 28136 ...
## $ lag_2days_11: num 37845 37351 37140 35383 29059 ...
## $ lag_2days_12: num 36194 35901 34623 34131 28947 ...
## $ lag_2days_13: num 36543 36800 36135 33828 28945 ...
## $ lag_2days_14: num 36847 37377 36556 33525 29090 ...
## $ lag_2days_15: num 36434 37100 36190 32951 29448 ...
## $ lag_2days_16: num 37581 37669 37032 33656 30569 ...
## $ lag_2days_17: num 37305 37907 37774 35045 32006 ...
## $ lag_2days_18: num 35832 35842 35931 34407 31706 ...
## $ lag_2days_19: num 34500 34622 34710 33494 31306 ...
## $ lag_2days_2 : num 26579 26322 25975 26809 25656 ...
## $ lag_2days_20: num 33670 33785 33908 32624 30870 ...
## $ lag_2days_21: num 32931 32638 33049 32098 30463 ...
## $ lag_2days_22: num 32913 32740 33220 32177 30538 ...
## $ lag_2days_23: num 31451 31093 32022 30760 28858 ...
## $ lag_2days_3 : num 25719 25748 25316 25799 24938 ...
## $ lag_2days_4 : num 25865 25637 25128 25820 24538 ...
## $ lag_2days_5 : num 25919 25933 25356 26036 24616 ...
## $ lag_2days_6 : num 27092 26964 26339 26451 24478 ...
## $ lag_2days_7 : num 28533 28445 28086 26853 23813 ...
## $ lag_2days_8 : num 33203 32804 32702 30627 24677 ...
## $ lag_2days_9 : num 36257 35608 35788 33468 26693 ...
## $ value : num 32022 30760 28858 31148 30907 ...
## - attr(*, ".internal.selfref")=<externalptr>
test_twentythree <- final_feature_set_twentythree[c(1514:1593),c(2:49)]
train_twentythree_mat= as.matrix(train_twentythree[,-49])
train_twentythree_target= train_twentythree[,49]
train_twentythree_target_mat= as.matrix(train_twentythree_target)
test_twentythree_mat= as.matrix(test_twentythree)
fit_cvglmnet_twentythree=cv.glmnet(train_twentythree_mat,train_twentythree_target_mat,nfolds=10,alpha = 1)
str(fit_cvglmnet_twentythree)
## List of 11
## $ lambda : num [1:100] 2127 1938 1766 1609 1466 ...
## $ cvm : num [1:100] 8608932 7759755 6952511 6280752 5723428 ...
## $ cvsd : num [1:100] 340152 325599 295784 271167 251265 ...
## $ cvup : num [1:100] 8949084 8085354 7248294 6551918 5974693 ...
## $ cvlo : num [1:100] 8268780 7434157 6656727 6009585 5472163 ...
## $ nzero : Named int [1:100] 0 2 2 2 2 2 3 3 3 3 ...
## ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## $ call : language cv.glmnet(x = train_twentythree_mat, y = train_twentythree_target_mat, nfolds = 10, alpha = 1)
## $ name : Named chr "Mean-Squared Error"
## ..- attr(*, "names")= chr "mse"
## $ glmnet.fit:List of 12
## ..$ a0 : Named num [1:100] 32994 30412 27973 25751 23726 ...
## .. ..- attr(*, "names")= chr [1:100] "s0" "s1" "s2" "s3" ...
## ..$ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. ..@ i : int [1:1744] 16 40 16 40 16 40 16 40 16 40 ...
## .. .. ..@ p : int [1:101] 0 0 2 4 6 8 10 13 16 19 ...
## .. .. ..@ Dim : int [1:2] 48 100
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : chr [1:48] "lag_7days_0" "lag_7days_1" "lag_7days_10" "lag_7days_11" ...
## .. .. .. ..$ : chr [1:100] "s0" "s1" "s2" "s3" ...
## .. .. ..@ x : num [1:1744] 0.0445 0.0338 0.0815 0.0708 0.1151 ...
## .. .. ..@ factors : list()
## ..$ df : int [1:100] 0 2 2 2 2 2 3 3 3 3 ...
## ..$ dim : int [1:2] 48 100
## ..$ lambda : num [1:100] 2127 1938 1766 1609 1466 ...
## ..$ dev.ratio: num [1:100] 0 0.108 0.201 0.279 0.343 ...
## ..$ nulldev : num 1.31e+10
## ..$ npasses : int 7571
## ..$ jerr : int 0
## ..$ offset : logi FALSE
## ..$ call : language glmnet(x = train_twentythree_mat, y = train_twentythree_target_mat, alpha = 1)
## ..$ nobs : int 1513
## ..- attr(*, "class")= chr [1:2] "elnet" "glmnet"
## $ lambda.min: num 0.65
## $ lambda.1se: num 6.06
## - attr(*, "class")= chr "cv.glmnet"
coef(fit_cvglmnet_twentythree,s='lambda.min')
## 49 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 7.014920e+03
## lag_7days_0 1.075874e-01
## lag_7days_1 1.282504e-01
## lag_7days_10 -3.211591e-03
## lag_7days_11 -4.418004e-01
## lag_7days_12 .
## lag_7days_13 1.961200e-02
## lag_7days_14 2.400484e-01
## lag_7days_15 .
## lag_7days_16 1.901157e-01
## lag_7days_17 3.370144e-02
## lag_7days_18 -2.012560e-01
## lag_7days_19 -1.914070e-01
## lag_7days_2 -1.308167e-02
## lag_7days_20 -2.996492e-01
## lag_7days_21 1.875687e-01
## lag_7days_22 .
## lag_7days_23 3.601964e-01
## lag_7days_3 8.754653e-02
## lag_7days_4 -1.838702e-01
## lag_7days_5 -8.313323e-01
## lag_7days_6 .
## lag_7days_7 5.306364e-01
## lag_7days_8 2.344061e-01
## lag_7days_9 .
## lag_2days_0 -4.428577e-01
## lag_2days_1 -2.663923e-01
## lag_2days_10 -3.573027e-02
## lag_2days_11 -7.717411e-02
## lag_2days_12 7.924516e-02
## lag_2days_13 9.525646e-01
## lag_2days_14 -7.280530e-01
## lag_2days_15 .
## lag_2days_16 .
## lag_2days_17 .
## lag_2days_18 1.365310e-01
## lag_2days_19 -6.352407e-02
## lag_2days_2 -5.463805e-01
## lag_2days_20 .
## lag_2days_21 3.916784e-01
## lag_2days_22 -4.875462e-01
## lag_2days_23 1.262710e+00
## lag_2days_3 .
## lag_2days_4 7.904101e-01
## lag_2days_5 4.905170e-01
## lag_2days_6 .
## lag_2days_7 1.089061e-01
## lag_2days_8 -1.476009e-02
## lag_2days_9 -6.102816e-01
plot(fit_cvglmnet_twentythree)
fit_cvglmnet_twentythree
##
## Call: cv.glmnet(x = train_twentythree_mat, y = train_twentythree_target_mat, nfolds = 10, alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Measure SE Nonzero
## min 0.650 1519730 98867 37
## 1se 6.058 1611292 109757 20
test_twentythree_pred=predict(fit_cvglmnet_twentythree, newx=test_twentythree_mat,s=c("lambda.min"))
test_twentythree_actual=final_feature_set_twentythree[c(1514:1593),c(50)]
test_twentythree_mape=((test_twentythree_actual-test_twentythree_pred)/test_twentythree_actual)*100
summary(test_twentythree_mape)
## value
## Min. :-15.69196
## 1st Qu.: -6.24460
## Median : -2.30001
## Mean : -2.86093
## 3rd Qu.: 0.07991
## Max. : 5.80022
boxplot(test_twentythree_mape)
###-----------------------------------------------------------------------------------